From 436a850dd9cac09bf88e12e20cc79408b1d29788 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 15 May 2016 19:50:14 +0200 Subject: netfilter: helper: avoid extra expectation iterations on unregister The expectation table is not duplicated per net namespace anymore, so we can move the expectation table and conntrack table iteration out of the per-net loop. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_helper.c | 61 +++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 29 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index f703adb7e5f7..7ba16e9c69fa 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -388,13 +388,40 @@ EXPORT_SYMBOL_GPL(nf_conntrack_helper_register); static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me, struct net *net) +{ + struct nf_conntrack_tuple_hash *h; + const struct hlist_nulls_node *nn; + int cpu; + + /* Get rid of expecteds, set helpers to NULL. */ + for_each_possible_cpu(cpu) { + struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu); + + spin_lock_bh(&pcpu->lock); + hlist_nulls_for_each_entry(h, nn, &pcpu->unconfirmed, hnnode) + unhelp(h, me); + spin_unlock_bh(&pcpu->lock); + } +} + +void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) { struct nf_conntrack_tuple_hash *h; struct nf_conntrack_expect *exp; const struct hlist_node *next; const struct hlist_nulls_node *nn; + struct net *net; unsigned int i; - int cpu; + + mutex_lock(&nf_ct_helper_mutex); + hlist_del_rcu(&me->hnode); + nf_ct_helper_count--; + mutex_unlock(&nf_ct_helper_mutex); + + /* Make sure every nothing is still using the helper unless its a + * connection in the hash. + */ + synchronize_rcu(); /* Get rid of expectations */ spin_lock_bh(&nf_conntrack_expect_lock); @@ -414,15 +441,11 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me, } spin_unlock_bh(&nf_conntrack_expect_lock); - /* Get rid of expecteds, set helpers to NULL. */ - for_each_possible_cpu(cpu) { - struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu); + rtnl_lock(); + for_each_net(net) + __nf_conntrack_helper_unregister(me, net); + rtnl_unlock(); - spin_lock_bh(&pcpu->lock); - hlist_nulls_for_each_entry(h, nn, &pcpu->unconfirmed, hnnode) - unhelp(h, me); - spin_unlock_bh(&pcpu->lock); - } local_bh_disable(); for (i = 0; i < nf_conntrack_htable_size; i++) { nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); @@ -434,26 +457,6 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me, } local_bh_enable(); } - -void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) -{ - struct net *net; - - mutex_lock(&nf_ct_helper_mutex); - hlist_del_rcu(&me->hnode); - nf_ct_helper_count--; - mutex_unlock(&nf_ct_helper_mutex); - - /* Make sure every nothing is still using the helper unless its a - * connection in the hash. - */ - synchronize_rcu(); - - rtnl_lock(); - for_each_net(net) - __nf_conntrack_helper_unregister(me, net); - rtnl_unlock(); -} EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister); static struct nf_ct_ext_type helper_extend __read_mostly = { -- cgit v1.2.3 From edb09eb17ed89eaa82a52dd306beac93e292b485 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 6 Jun 2016 09:37:16 -0700 Subject: net: sched: do not acquire qdisc spinlock in qdisc/class stats dump Large tc dumps (tc -s {qdisc|class} sh dev ethX) done by Google BwE host agent [1] are problematic at scale : For each qdisc/class found in the dump, we currently lock the root qdisc spinlock in order to get stats. Sampling stats every 5 seconds from thousands of HTB classes is a challenge when the root qdisc spinlock is under high pressure. Not only the dumps take time, they also slow down the fast path (queue/dequeue packets) by 10 % to 20 % in some cases. An audit of existing qdiscs showed that sch_fq_codel is the only qdisc that might need the qdisc lock in fq_codel_dump_stats() and fq_codel_dump_class_stats() In v2 of this patch, I now use the Qdisc running seqcount to provide consistent reads of packets/bytes counters, regardless of 32/64 bit arches. I also changed rate estimators to use the same infrastructure so that they no longer need to lock root qdisc lock. [1] http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/43838.pdf Signed-off-by: Eric Dumazet Cc: Cong Wang Cc: Jamal Hadi Salim Cc: John Fastabend Cc: Kevin Athey Cc: Xiaotian Pei Signed-off-by: David S. Miller --- Documentation/networking/gen_stats.txt | 2 +- include/net/gen_stats.h | 12 ++++++++---- include/net/sch_generic.h | 8 ++++++++ net/core/gen_estimator.c | 24 ++++++++++++++++-------- net/core/gen_stats.c | 34 +++++++++++++++++++++++----------- net/netfilter/xt_RATEEST.c | 2 +- net/sched/act_api.c | 4 ++-- net/sched/act_police.c | 3 ++- net/sched/sch_api.c | 21 +++++++++++---------- net/sched/sch_atm.c | 3 ++- net/sched/sch_cbq.c | 9 ++++++--- net/sched/sch_drr.c | 9 ++++++--- net/sched/sch_fq_codel.c | 15 +++++++++++---- net/sched/sch_hfsc.c | 10 +++++----- net/sched/sch_htb.c | 11 ++++++----- net/sched/sch_mq.c | 2 +- net/sched/sch_mqprio.c | 11 +++++++---- net/sched/sch_multiq.c | 3 ++- net/sched/sch_prio.c | 3 ++- net/sched/sch_qfq.c | 9 ++++++--- 20 files changed, 126 insertions(+), 69 deletions(-) (limited to 'net/netfilter') diff --git a/Documentation/networking/gen_stats.txt b/Documentation/networking/gen_stats.txt index ff630a87b511..179b18ce45ff 100644 --- a/Documentation/networking/gen_stats.txt +++ b/Documentation/networking/gen_stats.txt @@ -21,7 +21,7 @@ struct mystruct { ... }; -Update statistics: +Update statistics, in dequeue() methods only, (while owning qdisc->running) mystruct->tstats.packet++; mystruct->qstats.backlog += skb->pkt_len; diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h index 610cd397890e..231e121cc7d9 100644 --- a/include/net/gen_stats.h +++ b/include/net/gen_stats.h @@ -33,10 +33,12 @@ int gnet_stats_start_copy_compat(struct sk_buff *skb, int type, spinlock_t *lock, struct gnet_dump *d, int padattr); -int gnet_stats_copy_basic(struct gnet_dump *d, +int gnet_stats_copy_basic(const seqcount_t *running, + struct gnet_dump *d, struct gnet_stats_basic_cpu __percpu *cpu, struct gnet_stats_basic_packed *b); -void __gnet_stats_copy_basic(struct gnet_stats_basic_packed *bstats, +void __gnet_stats_copy_basic(const seqcount_t *running, + struct gnet_stats_basic_packed *bstats, struct gnet_stats_basic_cpu __percpu *cpu, struct gnet_stats_basic_packed *b); int gnet_stats_copy_rate_est(struct gnet_dump *d, @@ -52,13 +54,15 @@ int gnet_stats_finish_copy(struct gnet_dump *d); int gen_new_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_basic_cpu __percpu *cpu_bstats, struct gnet_stats_rate_est64 *rate_est, - spinlock_t *stats_lock, struct nlattr *opt); + spinlock_t *stats_lock, + seqcount_t *running, struct nlattr *opt); void gen_kill_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_rate_est64 *rate_est); int gen_replace_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_basic_cpu __percpu *cpu_bstats, struct gnet_stats_rate_est64 *rate_est, - spinlock_t *stats_lock, struct nlattr *opt); + spinlock_t *stats_lock, + seqcount_t *running, struct nlattr *opt); bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats, const struct gnet_stats_rate_est64 *rate_est); #endif diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index bff8d895ef8a..c4f5749342ec 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -314,6 +314,14 @@ static inline spinlock_t *qdisc_root_sleeping_lock(const struct Qdisc *qdisc) return qdisc_lock(root); } +static inline seqcount_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc) +{ + struct Qdisc *root = qdisc_root_sleeping(qdisc); + + ASSERT_RTNL(); + return &root->running; +} + static inline struct net_device *qdisc_dev(const struct Qdisc *qdisc) { return qdisc->dev_queue->dev; diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 4573d81093fe..cad8e791f28e 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -84,6 +84,7 @@ struct gen_estimator struct gnet_stats_basic_packed *bstats; struct gnet_stats_rate_est64 *rate_est; spinlock_t *stats_lock; + seqcount_t *running; int ewma_log; u32 last_packets; unsigned long avpps; @@ -121,26 +122,28 @@ static void est_timer(unsigned long arg) unsigned long rate; u64 brate; - spin_lock(e->stats_lock); + if (e->stats_lock) + spin_lock(e->stats_lock); read_lock(&est_lock); if (e->bstats == NULL) goto skip; - __gnet_stats_copy_basic(&b, e->cpu_bstats, e->bstats); + __gnet_stats_copy_basic(e->running, &b, e->cpu_bstats, e->bstats); brate = (b.bytes - e->last_bytes)<<(7 - idx); e->last_bytes = b.bytes; e->avbps += (brate >> e->ewma_log) - (e->avbps >> e->ewma_log); - e->rate_est->bps = (e->avbps+0xF)>>5; + WRITE_ONCE(e->rate_est->bps, (e->avbps + 0xF) >> 5); rate = b.packets - e->last_packets; rate <<= (7 - idx); e->last_packets = b.packets; e->avpps += (rate >> e->ewma_log) - (e->avpps >> e->ewma_log); - e->rate_est->pps = (e->avpps + 0xF) >> 5; + WRITE_ONCE(e->rate_est->pps, (e->avpps + 0xF) >> 5); skip: read_unlock(&est_lock); - spin_unlock(e->stats_lock); + if (e->stats_lock) + spin_unlock(e->stats_lock); } if (!list_empty(&elist[idx].list)) @@ -194,6 +197,7 @@ struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats * @cpu_bstats: bstats per cpu * @rate_est: rate estimator statistics * @stats_lock: statistics lock + * @running: qdisc running seqcount * @opt: rate estimator configuration TLV * * Creates a new rate estimator with &bstats as source and &rate_est @@ -209,6 +213,7 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_basic_cpu __percpu *cpu_bstats, struct gnet_stats_rate_est64 *rate_est, spinlock_t *stats_lock, + seqcount_t *running, struct nlattr *opt) { struct gen_estimator *est; @@ -226,12 +231,13 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats, if (est == NULL) return -ENOBUFS; - __gnet_stats_copy_basic(&b, cpu_bstats, bstats); + __gnet_stats_copy_basic(running, &b, cpu_bstats, bstats); idx = parm->interval + 2; est->bstats = bstats; est->rate_est = rate_est; est->stats_lock = stats_lock; + est->running = running; est->ewma_log = parm->ewma_log; est->last_bytes = b.bytes; est->avbps = rate_est->bps<<5; @@ -291,6 +297,7 @@ EXPORT_SYMBOL(gen_kill_estimator); * @cpu_bstats: bstats per cpu * @rate_est: rate estimator statistics * @stats_lock: statistics lock + * @running: qdisc running seqcount (might be NULL) * @opt: rate estimator configuration TLV * * Replaces the configuration of a rate estimator by calling @@ -301,10 +308,11 @@ EXPORT_SYMBOL(gen_kill_estimator); int gen_replace_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_basic_cpu __percpu *cpu_bstats, struct gnet_stats_rate_est64 *rate_est, - spinlock_t *stats_lock, struct nlattr *opt) + spinlock_t *stats_lock, + seqcount_t *running, struct nlattr *opt) { gen_kill_estimator(bstats, rate_est); - return gen_new_estimator(bstats, cpu_bstats, rate_est, stats_lock, opt); + return gen_new_estimator(bstats, cpu_bstats, rate_est, stats_lock, running, opt); } EXPORT_SYMBOL(gen_replace_estimator); diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index f96ee8b9478d..d9c210caff32 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -32,10 +32,11 @@ gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size, int padattr) return 0; nla_put_failure: + if (d->lock) + spin_unlock_bh(d->lock); kfree(d->xstats); d->xstats = NULL; d->xstats_len = 0; - spin_unlock_bh(d->lock); return -1; } @@ -65,15 +66,16 @@ gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type, { memset(d, 0, sizeof(*d)); - spin_lock_bh(lock); - d->lock = lock; if (type) d->tail = (struct nlattr *)skb_tail_pointer(skb); d->skb = skb; d->compat_tc_stats = tc_stats_type; d->compat_xstats = xstats_type; d->padattr = padattr; - + if (lock) { + d->lock = lock; + spin_lock_bh(lock); + } if (d->tail) return gnet_stats_copy(d, type, NULL, 0, padattr); @@ -126,16 +128,23 @@ __gnet_stats_copy_basic_cpu(struct gnet_stats_basic_packed *bstats, } void -__gnet_stats_copy_basic(struct gnet_stats_basic_packed *bstats, +__gnet_stats_copy_basic(const seqcount_t *running, + struct gnet_stats_basic_packed *bstats, struct gnet_stats_basic_cpu __percpu *cpu, struct gnet_stats_basic_packed *b) { + unsigned int seq; + if (cpu) { __gnet_stats_copy_basic_cpu(bstats, cpu); - } else { + return; + } + do { + if (running) + seq = read_seqcount_begin(running); bstats->bytes = b->bytes; bstats->packets = b->packets; - } + } while (running && read_seqcount_retry(running, seq)); } EXPORT_SYMBOL(__gnet_stats_copy_basic); @@ -152,13 +161,14 @@ EXPORT_SYMBOL(__gnet_stats_copy_basic); * if the room in the socket buffer was not sufficient. */ int -gnet_stats_copy_basic(struct gnet_dump *d, +gnet_stats_copy_basic(const seqcount_t *running, + struct gnet_dump *d, struct gnet_stats_basic_cpu __percpu *cpu, struct gnet_stats_basic_packed *b) { struct gnet_stats_basic_packed bstats = {0}; - __gnet_stats_copy_basic(&bstats, cpu, b); + __gnet_stats_copy_basic(running, &bstats, cpu, b); if (d->compat_tc_stats) { d->tc_stats.bytes = bstats.bytes; @@ -328,8 +338,9 @@ gnet_stats_copy_app(struct gnet_dump *d, void *st, int len) return 0; err_out: + if (d->lock) + spin_unlock_bh(d->lock); d->xstats_len = 0; - spin_unlock_bh(d->lock); return -1; } EXPORT_SYMBOL(gnet_stats_copy_app); @@ -363,10 +374,11 @@ gnet_stats_finish_copy(struct gnet_dump *d) return -1; } + if (d->lock) + spin_unlock_bh(d->lock); kfree(d->xstats); d->xstats = NULL; d->xstats_len = 0; - spin_unlock_bh(d->lock); return 0; } EXPORT_SYMBOL(gnet_stats_finish_copy); diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c index 604df6fae6fc..515131f9e021 100644 --- a/net/netfilter/xt_RATEEST.c +++ b/net/netfilter/xt_RATEEST.c @@ -137,7 +137,7 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par) cfg.est.ewma_log = info->ewma_log; ret = gen_new_estimator(&est->bstats, NULL, &est->rstats, - &est->lock, &cfg.opt); + &est->lock, NULL, &cfg.opt); if (ret < 0) goto err2; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 719bc2e85852..b6db56ec8117 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -287,7 +287,7 @@ err2: if (est) { err = gen_new_estimator(&p->tcfc_bstats, p->cpu_bstats, &p->tcfc_rate_est, - &p->tcfc_lock, est); + &p->tcfc_lock, NULL, est); if (err) { free_percpu(p->cpu_qstats); goto err2; @@ -671,7 +671,7 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a, if (err < 0) goto errout; - if (gnet_stats_copy_basic(&d, p->cpu_bstats, &p->tcfc_bstats) < 0 || + if (gnet_stats_copy_basic(NULL, &d, p->cpu_bstats, &p->tcfc_bstats) < 0 || gnet_stats_copy_rate_est(&d, &p->tcfc_bstats, &p->tcfc_rate_est) < 0 || gnet_stats_copy_queue(&d, p->cpu_qstats, diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 820b11686f85..bcb31142556b 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -185,7 +185,8 @@ override: if (est) { err = gen_replace_estimator(&police->tcf_bstats, NULL, &police->tcf_rate_est, - &police->tcf_lock, est); + &police->tcf_lock, + NULL, est); if (err) goto failure_unlock; } else if (tb[TCA_POLICE_AVRATE] && diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index ddf047df5361..d4a8bbfcc953 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -982,7 +982,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, rcu_assign_pointer(sch->stab, stab); } if (tca[TCA_RATE]) { - spinlock_t *root_lock; + seqcount_t *running; err = -EOPNOTSUPP; if (sch->flags & TCQ_F_MQROOT) @@ -991,14 +991,15 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, if ((sch->parent != TC_H_ROOT) && !(sch->flags & TCQ_F_INGRESS) && (!p || !(p->flags & TCQ_F_MQROOT))) - root_lock = qdisc_root_sleeping_lock(sch); + running = qdisc_root_sleeping_running(sch); else - root_lock = qdisc_lock(sch); + running = &sch->running; err = gen_new_estimator(&sch->bstats, sch->cpu_bstats, &sch->rate_est, - root_lock, + NULL, + running, tca[TCA_RATE]); if (err) goto err_out4; @@ -1061,7 +1062,8 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) gen_replace_estimator(&sch->bstats, sch->cpu_bstats, &sch->rate_est, - qdisc_root_sleeping_lock(sch), + NULL, + qdisc_root_sleeping_running(sch), tca[TCA_RATE]); } out: @@ -1369,8 +1371,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, goto nla_put_failure; if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, - qdisc_root_sleeping_lock(q), &d, - TCA_PAD) < 0) + NULL, &d, TCA_PAD) < 0) goto nla_put_failure; if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0) @@ -1381,7 +1382,8 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, cpu_qstats = q->cpu_qstats; } - if (gnet_stats_copy_basic(&d, cpu_bstats, &q->bstats) < 0 || + if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q), + &d, cpu_bstats, &q->bstats) < 0 || gnet_stats_copy_rate_est(&d, &q->bstats, &q->rate_est) < 0 || gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0) goto nla_put_failure; @@ -1684,8 +1686,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, goto nla_put_failure; if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, - qdisc_root_sleeping_lock(q), &d, - TCA_PAD) < 0) + NULL, &d, TCA_PAD) < 0) goto nla_put_failure; if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0) diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 1911af3ca7c0..34f8f79e56d5 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -637,7 +637,8 @@ atm_tc_dump_class_stats(struct Qdisc *sch, unsigned long arg, { struct atm_flow_data *flow = (struct atm_flow_data *)arg; - if (gnet_stats_copy_basic(d, NULL, &flow->bstats) < 0 || + if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), + d, NULL, &flow->bstats) < 0 || gnet_stats_copy_queue(d, NULL, &flow->qstats, flow->q->q.qlen) < 0) return -1; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index baafddf229ce..1b8128fb845d 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1600,7 +1600,8 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg, if (cl->undertime != PSCHED_PASTPERFECT) cl->xstats.undertime = cl->undertime - q->now; - if (gnet_stats_copy_basic(d, NULL, &cl->bstats) < 0 || + if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), + d, NULL, &cl->bstats) < 0 || gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 || gnet_stats_copy_queue(d, NULL, &cl->qstats, cl->q->q.qlen) < 0) return -1; @@ -1755,7 +1756,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t if (tca[TCA_RATE]) { err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est, - qdisc_root_sleeping_lock(sch), + NULL, + qdisc_root_sleeping_running(sch), tca[TCA_RATE]); if (err) { qdisc_put_rtab(rtab); @@ -1848,7 +1850,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t if (tca[TCA_RATE]) { err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est, - qdisc_root_sleeping_lock(sch), + NULL, + qdisc_root_sleeping_running(sch), tca[TCA_RATE]); if (err) { kfree(cl); diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index a63e879e8975..1b7e1a27773d 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -91,7 +91,8 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (tca[TCA_RATE]) { err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est, - qdisc_root_sleeping_lock(sch), + NULL, + qdisc_root_sleeping_running(sch), tca[TCA_RATE]); if (err) return err; @@ -119,7 +120,8 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (tca[TCA_RATE]) { err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est, - qdisc_root_sleeping_lock(sch), + NULL, + qdisc_root_sleeping_running(sch), tca[TCA_RATE]); if (err) { qdisc_destroy(cl->qdisc); @@ -279,7 +281,8 @@ static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg, if (qlen) xstats.deficit = cl->deficit; - if (gnet_stats_copy_basic(d, NULL, &cl->bstats) < 0 || + if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), + d, NULL, &cl->bstats) < 0 || gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 || gnet_stats_copy_queue(d, NULL, &cl->qdisc->qstats, qlen) < 0) return -1; diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 6883a8971562..1daa54237f4e 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -566,11 +566,13 @@ static int fq_codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d) st.qdisc_stats.memory_usage = q->memory_usage; st.qdisc_stats.drop_overmemory = q->drop_overmemory; + sch_tree_lock(sch); list_for_each(pos, &q->new_flows) st.qdisc_stats.new_flows_len++; list_for_each(pos, &q->old_flows) st.qdisc_stats.old_flows_len++; + sch_tree_unlock(sch); return gnet_stats_copy_app(d, &st, sizeof(st)); } @@ -624,7 +626,7 @@ static int fq_codel_dump_class_stats(struct Qdisc *sch, unsigned long cl, if (idx < q->flows_cnt) { const struct fq_codel_flow *flow = &q->flows[idx]; - const struct sk_buff *skb = flow->head; + const struct sk_buff *skb; memset(&xstats, 0, sizeof(xstats)); xstats.type = TCA_FQ_CODEL_XSTATS_CLASS; @@ -642,9 +644,14 @@ static int fq_codel_dump_class_stats(struct Qdisc *sch, unsigned long cl, codel_time_to_us(delta) : -codel_time_to_us(-delta); } - while (skb) { - qs.qlen++; - skb = skb->next; + if (flow->head) { + sch_tree_lock(sch); + skb = flow->head; + while (skb) { + qs.qlen++; + skb = skb->next; + } + sch_tree_unlock(sch); } qs.backlog = q->backlogs[idx]; qs.drops = flow->dropped; diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index d783d7cc3348..74813dd49053 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1015,11 +1015,10 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, cur_time = psched_get_time(); if (tca[TCA_RATE]) { - spinlock_t *lock = qdisc_root_sleeping_lock(sch); - err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est, - lock, + NULL, + qdisc_root_sleeping_running(sch), tca[TCA_RATE]); if (err) return err; @@ -1068,7 +1067,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (tca[TCA_RATE]) { err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est, - qdisc_root_sleeping_lock(sch), + NULL, + qdisc_root_sleeping_running(sch), tca[TCA_RATE]); if (err) { kfree(cl); @@ -1373,7 +1373,7 @@ hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg, xstats.work = cl->cl_total; xstats.rtwork = cl->cl_cumul; - if (gnet_stats_copy_basic(d, NULL, &cl->bstats) < 0 || + if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 || gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 || gnet_stats_copy_queue(d, NULL, &cl->qstats, cl->qdisc->q.qlen) < 0) return -1; diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index d4b4218af6b1..2b057649f24b 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1141,7 +1141,8 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) cl->xstats.tokens = PSCHED_NS2TICKS(cl->tokens); cl->xstats.ctokens = PSCHED_NS2TICKS(cl->ctokens); - if (gnet_stats_copy_basic(d, NULL, &cl->bstats) < 0 || + if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), + d, NULL, &cl->bstats) < 0 || gnet_stats_copy_rate_est(d, NULL, &cl->rate_est) < 0 || gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0) return -1; @@ -1395,7 +1396,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (htb_rate_est || tca[TCA_RATE]) { err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est, - qdisc_root_sleeping_lock(sch), + NULL, + qdisc_root_sleeping_running(sch), tca[TCA_RATE] ? : &est.nla); if (err) { kfree(cl); @@ -1457,11 +1459,10 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, parent->children++; } else { if (tca[TCA_RATE]) { - spinlock_t *lock = qdisc_root_sleeping_lock(sch); - err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est, - lock, + NULL, + qdisc_root_sleeping_running(sch), tca[TCA_RATE]); if (err) return err; diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index 56a77b878eb3..b9439827c172 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -199,7 +199,7 @@ static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl, struct netdev_queue *dev_queue = mq_queue_get(sch, cl); sch = dev_queue->qdisc_sleeping; - if (gnet_stats_copy_basic(d, NULL, &sch->bstats) < 0 || + if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 || gnet_stats_copy_queue(d, NULL, &sch->qstats, sch->q.qlen) < 0) return -1; return 0; diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index b8002ce3d010..549c66359924 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -342,7 +342,8 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, * hold here is the look on dev_queue->qdisc_sleeping * also acquired below. */ - spin_unlock_bh(d->lock); + if (d->lock) + spin_unlock_bh(d->lock); for (i = tc.offset; i < tc.offset + tc.count; i++) { struct netdev_queue *q = netdev_get_tx_queue(dev, i); @@ -359,15 +360,17 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, spin_unlock_bh(qdisc_lock(qdisc)); } /* Reclaim root sleeping lock before completing stats */ - spin_lock_bh(d->lock); - if (gnet_stats_copy_basic(d, NULL, &bstats) < 0 || + if (d->lock) + spin_lock_bh(d->lock); + if (gnet_stats_copy_basic(NULL, d, NULL, &bstats) < 0 || gnet_stats_copy_queue(d, NULL, &qstats, qlen) < 0) return -1; } else { struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl); sch = dev_queue->qdisc_sleeping; - if (gnet_stats_copy_basic(d, NULL, &sch->bstats) < 0 || + if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), + d, NULL, &sch->bstats) < 0 || gnet_stats_copy_queue(d, NULL, &sch->qstats, sch->q.qlen) < 0) return -1; diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index bcdd54bb101c..21e69d2e8347 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -356,7 +356,8 @@ static int multiq_dump_class_stats(struct Qdisc *sch, unsigned long cl, struct Qdisc *cl_q; cl_q = q->queues[cl - 1]; - if (gnet_stats_copy_basic(d, NULL, &cl_q->bstats) < 0 || + if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), + d, NULL, &cl_q->bstats) < 0 || gnet_stats_copy_queue(d, NULL, &cl_q->qstats, cl_q->q.qlen) < 0) return -1; diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index fee1b15506b2..06eca7060683 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -319,7 +319,8 @@ static int prio_dump_class_stats(struct Qdisc *sch, unsigned long cl, struct Qdisc *cl_q; cl_q = q->queues[cl - 1]; - if (gnet_stats_copy_basic(d, NULL, &cl_q->bstats) < 0 || + if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), + d, NULL, &cl_q->bstats) < 0 || gnet_stats_copy_queue(d, NULL, &cl_q->qstats, cl_q->q.qlen) < 0) return -1; diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 8d2d8d953432..85d41979d825 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -460,7 +460,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (tca[TCA_RATE]) { err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est, - qdisc_root_sleeping_lock(sch), + NULL, + qdisc_root_sleeping_running(sch), tca[TCA_RATE]); if (err) return err; @@ -486,7 +487,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (tca[TCA_RATE]) { err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est, - qdisc_root_sleeping_lock(sch), + NULL, + qdisc_root_sleeping_running(sch), tca[TCA_RATE]); if (err) goto destroy_class; @@ -663,7 +665,8 @@ static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg, xstats.weight = cl->agg->class_weight; xstats.lmax = cl->agg->lmax; - if (gnet_stats_copy_basic(d, NULL, &cl->bstats) < 0 || + if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), + d, NULL, &cl->bstats) < 0 || gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 || gnet_stats_copy_queue(d, NULL, &cl->qdisc->qstats, cl->qdisc->q.qlen) < 0) -- cgit v1.2.3 From a6d0bae14858a43ab9d76d6332d7c3f2a618a6a2 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 2 Jun 2016 10:59:56 +0800 Subject: netfilter: x_tables: fix possible ZERO_SIZE_PTR pointer dereferencing error. Since we cannot make sure that the 'hook_mask' will always be none zero here. If it equals to zero, the num_hooks will be zero too, and then kmalloc() will return ZERO_SIZE_PTR, which is (void *)16. Then the following error check will fails: ops = kmalloc(sizeof(*ops) * num_hooks, GFP_KERNEL); if (ops == NULL) return ERR_PTR(-ENOMEM); So this patch will fix this with just doing the zero check before kmalloc() is called. Maybe the case above will never happen here, but in theory. Signed-off-by: Xiubo Li Signed-off-by: Pablo Neira Ayuso --- net/netfilter/x_tables.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net/netfilter') diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index c69c892231d7..8aff34e8737c 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1460,6 +1460,9 @@ xt_hook_ops_alloc(const struct xt_table *table, nf_hookfn *fn) uint8_t hooknum; struct nf_hook_ops *ops; + if (!num_hooks) + return ERR_PTR(-EINVAL); + ops = kmalloc(sizeof(*ops) * num_hooks, GFP_KERNEL); if (ops == NULL) return ERR_PTR(-ENOMEM); -- cgit v1.2.3 From f3bb53338e0965c3084c185020e821ac49015832 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Wed, 8 Jun 2016 20:43:17 +0800 Subject: netfilter: nf_log: handle NFPROTO_INET properly in nf_logger_[find_get|put] When we request NFPROTO_INET, it means both NFPROTO_IPV4 and NFPROTO_IPV6. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_log.c | 20 ++++++++++++++++++++ net/netfilter/nft_log.c | 21 +-------------------- 2 files changed, 21 insertions(+), 20 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index a5d41dfa9f05..73b845d3cd33 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -159,6 +159,20 @@ int nf_logger_find_get(int pf, enum nf_log_type type) struct nf_logger *logger; int ret = -ENOENT; + if (pf == NFPROTO_INET) { + ret = nf_logger_find_get(NFPROTO_IPV4, type); + if (ret < 0) + return ret; + + ret = nf_logger_find_get(NFPROTO_IPV6, type); + if (ret < 0) { + nf_logger_put(NFPROTO_IPV4, type); + return ret; + } + + return 0; + } + if (rcu_access_pointer(loggers[pf][type]) == NULL) request_module("nf-logger-%u-%u", pf, type); @@ -179,6 +193,12 @@ void nf_logger_put(int pf, enum nf_log_type type) { struct nf_logger *logger; + if (pf == NFPROTO_INET) { + nf_logger_put(NFPROTO_IPV4, type); + nf_logger_put(NFPROTO_IPV6, type); + return; + } + BUG_ON(loggers[pf][type] == NULL); rcu_read_lock(); diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c index 319c22b4bca2..713d66837705 100644 --- a/net/netfilter/nft_log.c +++ b/net/netfilter/nft_log.c @@ -52,7 +52,6 @@ static int nft_log_init(const struct nft_ctx *ctx, struct nft_log *priv = nft_expr_priv(expr); struct nf_loginfo *li = &priv->loginfo; const struct nlattr *nla; - int ret; nla = tb[NFTA_LOG_PREFIX]; if (nla != NULL) { @@ -97,19 +96,6 @@ static int nft_log_init(const struct nft_ctx *ctx, break; } - if (ctx->afi->family == NFPROTO_INET) { - ret = nf_logger_find_get(NFPROTO_IPV4, li->type); - if (ret < 0) - return ret; - - ret = nf_logger_find_get(NFPROTO_IPV6, li->type); - if (ret < 0) { - nf_logger_put(NFPROTO_IPV4, li->type); - return ret; - } - return 0; - } - return nf_logger_find_get(ctx->afi->family, li->type); } @@ -122,12 +108,7 @@ static void nft_log_destroy(const struct nft_ctx *ctx, if (priv->prefix != nft_log_null_prefix) kfree(priv->prefix); - if (ctx->afi->family == NFPROTO_INET) { - nf_logger_put(NFPROTO_IPV4, li->type); - nf_logger_put(NFPROTO_IPV6, li->type); - } else { - nf_logger_put(ctx->afi->family, li->type); - } + nf_logger_put(ctx->afi->family, li->type); } static int nft_log_dump(struct sk_buff *skb, const struct nft_expr *expr) -- cgit v1.2.3 From 36f959c491abc7e0acf94b631a6d7a3e2e3699b0 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Wed, 8 Jun 2016 20:43:19 +0800 Subject: netfilter: xt_TRACE: add explicitly nf_logger_find_get call Consider such situation, if nf_log_ipv4 kernel module is not installed, and the user add a following iptables rule: # iptables -t raw -I PREROUTING -j TRACE There will be no trace log generated until the user install nf_log_ipv4 module manully. So we should add request related nf_log module appropriately here. Signed-off-by: Liping Zhang Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_TRACE.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/xt_TRACE.c b/net/netfilter/xt_TRACE.c index df48967af382..858d189a1303 100644 --- a/net/netfilter/xt_TRACE.c +++ b/net/netfilter/xt_TRACE.c @@ -4,12 +4,23 @@ #include #include +#include MODULE_DESCRIPTION("Xtables: packet flow tracing"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_TRACE"); MODULE_ALIAS("ip6t_TRACE"); +static int trace_tg_check(const struct xt_tgchk_param *par) +{ + return nf_logger_find_get(par->family, NF_LOG_TYPE_LOG); +} + +static void trace_tg_destroy(const struct xt_tgdtor_param *par) +{ + nf_logger_put(par->family, NF_LOG_TYPE_LOG); +} + static unsigned int trace_tg(struct sk_buff *skb, const struct xt_action_param *par) { @@ -18,12 +29,14 @@ trace_tg(struct sk_buff *skb, const struct xt_action_param *par) } static struct xt_target trace_tg_reg __read_mostly = { - .name = "TRACE", - .revision = 0, - .family = NFPROTO_UNSPEC, - .table = "raw", - .target = trace_tg, - .me = THIS_MODULE, + .name = "TRACE", + .revision = 0, + .family = NFPROTO_UNSPEC, + .table = "raw", + .target = trace_tg, + .checkentry = trace_tg_check, + .destroy = trace_tg_destroy, + .me = THIS_MODULE, }; static int __init trace_tg_init(void) -- cgit v1.2.3 From 5a75cdebabc4576ca31f497a9272ac558421b119 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 10 Jun 2016 22:25:22 +0200 Subject: netfilter: conntrack: align nf_conn on cacheline boundary increases struct size by 32 bytes (288 -> 320), but it is the right thing, else any attempt to (re-)arrange nf_conn members by cacheline won't work. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index db2312eeb2a4..2903bb43547c 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1731,7 +1731,7 @@ int nf_conntrack_init_start(void) nf_conntrack_cachep = kmem_cache_create("nf_conntrack", sizeof(struct nf_conn), 0, - SLAB_DESTROY_BY_RCU, NULL); + SLAB_DESTROY_BY_RCU | SLAB_HWCACHE_ALIGN, NULL); if (!nf_conntrack_cachep) goto err_cachep; -- cgit v1.2.3 From 7e53e7f8ca24e01292d114373f35b2999301d879 Mon Sep 17 00:00:00 2001 From: Shivani Bhardwaj Date: Sun, 12 Jun 2016 00:26:10 +0530 Subject: netfilter: nf_log: Remove NULL check If 'logger' was NULL, there would be a direct jump to the label 'out', since it has already been checked for NULL, remove this unnecessary check. Signed-off-by: Shivani Bhardwaj Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_log.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 73b845d3cd33..18e325ce6542 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -181,7 +181,7 @@ int nf_logger_find_get(int pf, enum nf_log_type type) if (logger == NULL) goto out; - if (logger && try_module_get(logger->me)) + if (try_module_get(logger->me)) ret = 0; out: rcu_read_unlock(); -- cgit v1.2.3 From 6c8dee9842461e6ee6eb46081478999b3d5cb297 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 11 Jun 2016 21:57:35 +0200 Subject: netfilter: move zone info into struct nf_conn Curently we store zone information as a conntrack extension. This has one drawback: for every lookup we need to fetch the zone data from the extension area. This change place the zone data directly into the main conntrack object structure and then removes the zone conntrack extension. The zone data is just 4 bytes, it fits into a padding hole before the tuplehash info, so we do not even increase the nf_conn structure size. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 3 +++ include/net/netfilter/nf_conntrack_extend.h | 4 ---- include/net/netfilter/nf_conntrack_zones.h | 33 ++++++++++------------------- net/netfilter/nf_conntrack_core.c | 33 ++--------------------------- 4 files changed, 16 insertions(+), 57 deletions(-) (limited to 'net/netfilter') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index dd78bea227c8..9c0ed3d7af89 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -85,6 +85,9 @@ struct nf_conn { spinlock_t lock; u16 cpu; +#ifdef CONFIG_NF_CONNTRACK_ZONES + struct nf_conntrack_zone zone; +#endif /* XXX should I move this to the tail ? - Y.K */ /* These are my tuples; original and reply */ struct nf_conntrack_tuple_hash tuplehash[IP_CT_DIR_MAX]; diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h index 55d15049ab2f..b925395fa5ed 100644 --- a/include/net/netfilter/nf_conntrack_extend.h +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -15,9 +15,6 @@ enum nf_ct_ext_id { #ifdef CONFIG_NF_CONNTRACK_EVENTS NF_CT_EXT_ECACHE, #endif -#ifdef CONFIG_NF_CONNTRACK_ZONES - NF_CT_EXT_ZONE, -#endif #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP NF_CT_EXT_TSTAMP, #endif @@ -38,7 +35,6 @@ enum nf_ct_ext_id { #define NF_CT_EXT_SEQADJ_TYPE struct nf_conn_seqadj #define NF_CT_EXT_ACCT_TYPE struct nf_conn_acct #define NF_CT_EXT_ECACHE_TYPE struct nf_conntrack_ecache -#define NF_CT_EXT_ZONE_TYPE struct nf_conntrack_zone #define NF_CT_EXT_TSTAMP_TYPE struct nf_conn_tstamp #define NF_CT_EXT_TIMEOUT_TYPE struct nf_conn_timeout #define NF_CT_EXT_LABELS_TYPE struct nf_conn_labels diff --git a/include/net/netfilter/nf_conntrack_zones.h b/include/net/netfilter/nf_conntrack_zones.h index bd4692690914..64a718b60839 100644 --- a/include/net/netfilter/nf_conntrack_zones.h +++ b/include/net/netfilter/nf_conntrack_zones.h @@ -9,12 +9,11 @@ static inline const struct nf_conntrack_zone * nf_ct_zone(const struct nf_conn *ct) { - const struct nf_conntrack_zone *nf_ct_zone = NULL; - #ifdef CONFIG_NF_CONNTRACK_ZONES - nf_ct_zone = nf_ct_ext_find(ct, NF_CT_EXT_ZONE); + return &ct->zone; +#else + return &nf_ct_zone_dflt; #endif - return nf_ct_zone ? nf_ct_zone : &nf_ct_zone_dflt; } static inline const struct nf_conntrack_zone * @@ -31,32 +30,22 @@ static inline const struct nf_conntrack_zone * nf_ct_zone_tmpl(const struct nf_conn *tmpl, const struct sk_buff *skb, struct nf_conntrack_zone *tmp) { - const struct nf_conntrack_zone *zone; - +#ifdef CONFIG_NF_CONNTRACK_ZONES if (!tmpl) return &nf_ct_zone_dflt; - zone = nf_ct_zone(tmpl); - if (zone->flags & NF_CT_FLAG_MARK) - zone = nf_ct_zone_init(tmp, skb->mark, zone->dir, 0); - - return zone; + if (tmpl->zone.flags & NF_CT_FLAG_MARK) + return nf_ct_zone_init(tmp, skb->mark, tmpl->zone.dir, 0); +#endif + return nf_ct_zone(tmpl); } -static inline int nf_ct_zone_add(struct nf_conn *ct, gfp_t flags, - const struct nf_conntrack_zone *info) +static inline void nf_ct_zone_add(struct nf_conn *ct, + const struct nf_conntrack_zone *zone) { #ifdef CONFIG_NF_CONNTRACK_ZONES - struct nf_conntrack_zone *nf_ct_zone; - - nf_ct_zone = nf_ct_ext_add(ct, NF_CT_EXT_ZONE, flags); - if (!nf_ct_zone) - return -ENOMEM; - - nf_ct_zone_init(nf_ct_zone, info->id, info->dir, - info->flags); + ct->zone = *zone; #endif - return 0; } static inline bool nf_ct_zone_matches_dir(const struct nf_conntrack_zone *zone, diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 2903bb43547c..a459176c3253 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -327,16 +327,10 @@ struct nf_conn *nf_ct_tmpl_alloc(struct net *net, tmpl->status = IPS_TEMPLATE; write_pnet(&tmpl->ct_net, net); - - if (nf_ct_zone_add(tmpl, flags, zone) < 0) - goto out_free; - + nf_ct_zone_add(tmpl, zone); atomic_set(&tmpl->ct_general.use, 0); return tmpl; -out_free: - kfree(tmpl); - return NULL; } EXPORT_SYMBOL_GPL(nf_ct_tmpl_alloc); @@ -929,16 +923,13 @@ __nf_conntrack_alloc(struct net *net, offsetof(struct nf_conn, proto) - offsetof(struct nf_conn, __nfct_init_offset[0])); - if (zone && nf_ct_zone_add(ct, GFP_ATOMIC, zone) < 0) - goto out_free; + nf_ct_zone_add(ct, zone); /* Because we use RCU lookups, we set ct_general.use to zero before * this is inserted in any list. */ atomic_set(&ct->ct_general.use, 0); return ct; -out_free: - kmem_cache_free(nf_conntrack_cachep, ct); out: atomic_dec(&net->ct.count); return ERR_PTR(-ENOMEM); @@ -1342,14 +1333,6 @@ bool __nf_ct_kill_acct(struct nf_conn *ct, } EXPORT_SYMBOL_GPL(__nf_ct_kill_acct); -#ifdef CONFIG_NF_CONNTRACK_ZONES -static struct nf_ct_ext_type nf_ct_zone_extend __read_mostly = { - .len = sizeof(struct nf_conntrack_zone), - .align = __alignof__(struct nf_conntrack_zone), - .id = NF_CT_EXT_ZONE, -}; -#endif - #if IS_ENABLED(CONFIG_NF_CT_NETLINK) #include @@ -1532,9 +1515,6 @@ void nf_conntrack_cleanup_end(void) nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size); -#ifdef CONFIG_NF_CONNTRACK_ZONES - nf_ct_extend_unregister(&nf_ct_zone_extend); -#endif nf_conntrack_proto_fini(); nf_conntrack_seqadj_fini(); nf_conntrack_labels_fini(); @@ -1771,11 +1751,6 @@ int nf_conntrack_init_start(void) if (ret < 0) goto err_seqadj; -#ifdef CONFIG_NF_CONNTRACK_ZONES - ret = nf_ct_extend_register(&nf_ct_zone_extend); - if (ret < 0) - goto err_extend; -#endif ret = nf_conntrack_proto_init(); if (ret < 0) goto err_proto; @@ -1791,10 +1766,6 @@ int nf_conntrack_init_start(void) return 0; err_proto: -#ifdef CONFIG_NF_CONNTRACK_ZONES - nf_ct_extend_unregister(&nf_ct_zone_extend); -err_extend: -#endif nf_conntrack_seqadj_fini(); err_seqadj: nf_conntrack_labels_fini(); -- cgit v1.2.3 From 9847371a84b0be330f4bc4aaa98904101ee8573d Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 14 Jun 2016 15:14:12 -0700 Subject: netfilter: Allow xt_owner in any user namespace Making this work is a little tricky as it really isn't kosher to change the xt_owner_match_info in a check function. Without changing xt_owner_match_info we need to know the user namespace the uids and gids are specified in. In the common case net->user_ns == current_user_ns(). Verify net->user_ns == current_user_ns() in owner_check so we can later assume it in owner_mt. In owner_check also verify that all of the uids and gids specified are in net->user_ns and that the expected min/max relationship exists between the uids and gids in xt_owner_match_info. In owner_mt get the network namespace from the outgoing socket, as this must be the same network namespace as the netfilter rules, and use that network namespace to find the user namespace the uids and gids in xt_match_owner_info are encoded in. Then convert from their encoded from into the kernel internal format for uids and gids and perform the owner match. Similar to ping_group_range, this code does not try to detect noncontiguous UID/GID ranges. Signed-off-by: "Eric W. Biederman" Signed-off-by: Kevin Cernekee Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_owner.c | 41 +++++++++++++++++++++++++++++++++++------ 1 file changed, 35 insertions(+), 6 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c index 1302b475abcb..a20e731b5b6c 100644 --- a/net/netfilter/xt_owner.c +++ b/net/netfilter/xt_owner.c @@ -21,11 +21,39 @@ static int owner_check(const struct xt_mtchk_param *par) { struct xt_owner_match_info *info = par->matchinfo; + struct net *net = par->net; - /* For now only allow adding matches from the initial user namespace */ + /* Only allow the common case where the userns of the writer + * matches the userns of the network namespace. + */ if ((info->match & (XT_OWNER_UID|XT_OWNER_GID)) && - (current_user_ns() != &init_user_ns)) + (current_user_ns() != net->user_ns)) return -EINVAL; + + /* Ensure the uids are valid */ + if (info->match & XT_OWNER_UID) { + kuid_t uid_min = make_kuid(net->user_ns, info->uid_min); + kuid_t uid_max = make_kuid(net->user_ns, info->uid_max); + + if (!uid_valid(uid_min) || !uid_valid(uid_max) || + (info->uid_max < info->uid_min) || + uid_lt(uid_max, uid_min)) { + return -EINVAL; + } + } + + /* Ensure the gids are valid */ + if (info->match & XT_OWNER_GID) { + kgid_t gid_min = make_kgid(net->user_ns, info->gid_min); + kgid_t gid_max = make_kgid(net->user_ns, info->gid_max); + + if (!gid_valid(gid_min) || !gid_valid(gid_max) || + (info->gid_max < info->gid_min) || + gid_lt(gid_max, gid_min)) { + return -EINVAL; + } + } + return 0; } @@ -35,6 +63,7 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par) const struct xt_owner_match_info *info = par->matchinfo; const struct file *filp; struct sock *sk = skb_to_full_sk(skb); + struct net *net = par->net; if (sk == NULL || sk->sk_socket == NULL) return (info->match ^ info->invert) == 0; @@ -51,8 +80,8 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par) (XT_OWNER_UID | XT_OWNER_GID)) == 0; if (info->match & XT_OWNER_UID) { - kuid_t uid_min = make_kuid(&init_user_ns, info->uid_min); - kuid_t uid_max = make_kuid(&init_user_ns, info->uid_max); + kuid_t uid_min = make_kuid(net->user_ns, info->uid_min); + kuid_t uid_max = make_kuid(net->user_ns, info->uid_max); if ((uid_gte(filp->f_cred->fsuid, uid_min) && uid_lte(filp->f_cred->fsuid, uid_max)) ^ !(info->invert & XT_OWNER_UID)) @@ -60,8 +89,8 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par) } if (info->match & XT_OWNER_GID) { - kgid_t gid_min = make_kgid(&init_user_ns, info->gid_min); - kgid_t gid_max = make_kgid(&init_user_ns, info->gid_max); + kgid_t gid_min = make_kgid(net->user_ns, info->gid_min); + kgid_t gid_max = make_kgid(net->user_ns, info->gid_max); if ((gid_gte(filp->f_cred->fsgid, gid_min) && gid_lte(filp->f_cred->fsgid, gid_max)) ^ !(info->invert & XT_OWNER_GID)) -- cgit v1.2.3 From 7643507fe8b5bd8ab7522f6a81058cc1209d2585 Mon Sep 17 00:00:00 2001 From: Vishwanath Pai Date: Tue, 21 Jun 2016 14:58:46 -0400 Subject: netfilter: xt_NFLOG: nflog-range does not truncate packets li->u.ulog.copy_len is currently ignored by the kernel, we should truncate the packet to either li->u.ulog.copy_len (if set) or copy_range before sending it to userspace. 0 is a valid input for copy_len, so add a new flag to indicate whether this was option was specified by the user or not. Add two flags to indicate whether nflog-size/copy_len was set or not. XT_NFLOG_F_COPY_LEN is for XT_NFLOG and NFLOG_F_COPY_LEN for nfnetlink_log On the userspace side, this was initially represented by the option nflog-range, this will be replaced by --nflog-size now. --nflog-range would still exist but does not do anything. Reported-by: Joe Dollard Reviewed-by: Josh Hunt Signed-off-by: Vishwanath Pai Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_log.h | 7 +++++++ include/uapi/linux/netfilter/xt_NFLOG.h | 6 +++++- net/netfilter/nfnetlink_log.c | 9 ++++++--- net/netfilter/xt_NFLOG.c | 3 +++ 4 files changed, 21 insertions(+), 4 deletions(-) (limited to 'net/netfilter') diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h index 57639fca223a..83d855ba6af1 100644 --- a/include/net/netfilter/nf_log.h +++ b/include/net/netfilter/nf_log.h @@ -12,6 +12,9 @@ #define NF_LOG_UID 0x08 /* Log UID owning local socket */ #define NF_LOG_MASK 0x0f +/* This flag indicates that copy_len field in nf_loginfo is set */ +#define NF_LOG_F_COPY_LEN 0x1 + enum nf_log_type { NF_LOG_TYPE_LOG = 0, NF_LOG_TYPE_ULOG, @@ -22,9 +25,13 @@ struct nf_loginfo { u_int8_t type; union { struct { + /* copy_len will be used iff you set + * NF_LOG_F_COPY_LEN in flags + */ u_int32_t copy_len; u_int16_t group; u_int16_t qthreshold; + u_int16_t flags; } ulog; struct { u_int8_t level; diff --git a/include/uapi/linux/netfilter/xt_NFLOG.h b/include/uapi/linux/netfilter/xt_NFLOG.h index 87b58311ce6b..f33070730fc8 100644 --- a/include/uapi/linux/netfilter/xt_NFLOG.h +++ b/include/uapi/linux/netfilter/xt_NFLOG.h @@ -6,9 +6,13 @@ #define XT_NFLOG_DEFAULT_GROUP 0x1 #define XT_NFLOG_DEFAULT_THRESHOLD 0 -#define XT_NFLOG_MASK 0x0 +#define XT_NFLOG_MASK 0x1 + +/* This flag indicates that 'len' field in xt_nflog_info is set*/ +#define XT_NFLOG_F_COPY_LEN 0x1 struct xt_nflog_info { + /* 'len' will be used iff you set XT_NFLOG_F_COPY_LEN in flags */ __u32 len; __u16 group; __u16 threshold; diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 11f81c8385fc..cbcfdfb586a6 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -700,10 +700,13 @@ nfulnl_log_packet(struct net *net, break; case NFULNL_COPY_PACKET: - if (inst->copy_range > skb->len) + data_len = inst->copy_range; + if ((li->u.ulog.flags & NF_LOG_F_COPY_LEN) && + (li->u.ulog.copy_len < data_len)) + data_len = li->u.ulog.copy_len; + + if (data_len > skb->len) data_len = skb->len; - else - data_len = inst->copy_range; size += nla_total_size(data_len); break; diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c index a1fa2c800cb9..018eed7e1ff1 100644 --- a/net/netfilter/xt_NFLOG.c +++ b/net/netfilter/xt_NFLOG.c @@ -33,6 +33,9 @@ nflog_tg(struct sk_buff *skb, const struct xt_action_param *par) li.u.ulog.group = info->group; li.u.ulog.qthreshold = info->threshold; + if (info->flags & XT_NFLOG_F_COPY_LEN) + li.u.ulog.flags |= NF_LOG_F_COPY_LEN; + nfulnl_log_packet(net, par->family, par->hooknum, skb, par->in, par->out, &li, info->prefix); return XT_CONTINUE; -- cgit v1.2.3 From 889f7ee7c6e84251215d43cbc856ea116c72d3f2 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 12 Jun 2016 18:07:07 +0200 Subject: netfilter: nf_tables: add generic macros to check for generation mask Thus, we can reuse these to check the genmask of any object type, not only rules. This is required now that tables, chain and sets will get a generation mask field too in follow up patches. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 24 ++++++++++++++++++++ net/netfilter/nf_tables_api.c | 46 +++++++-------------------------------- 2 files changed, 32 insertions(+), 38 deletions(-) (limited to 'net/netfilter') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 092235458691..d0778cbaf7f1 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -969,6 +969,30 @@ static inline u8 nft_genmask_cur(const struct net *net) #define NFT_GENMASK_ANY ((1 << 0) | (1 << 1)) +/* + * Generic transaction helpers + */ + +/* Check if this object is currently active. */ +#define nft_is_active(__net, __obj) \ + (((__obj)->genmask & nft_genmask_cur(__net)) == 0) + +/* Check if this object is active in the next generation. */ +#define nft_is_active_next(__net, __obj) \ + (((__obj)->genmask & nft_genmask_next(__net)) == 0) + +/* This object becomes active in the next generation. */ +#define nft_activate_next(__net, __obj) \ + (__obj)->genmask = nft_genmask_cur(__net) + +/* This object becomes inactive in the next generation. */ +#define nft_deactivate_next(__net, __obj) \ + (__obj)->genmask = nft_genmask_next(__net) + +/* After committing the ruleset, clear the stale generation bit. */ +#define nft_clear(__net, __obj) \ + (__obj)->genmask &= ~nft_genmask_next(__net) + /* * Set element transaction helpers */ diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 4d292b933b5c..d9f0f0797dec 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -234,42 +234,12 @@ static int nft_delchain(struct nft_ctx *ctx) return err; } -static inline bool -nft_rule_is_active(struct net *net, const struct nft_rule *rule) -{ - return (rule->genmask & nft_genmask_cur(net)) == 0; -} - -static inline int -nft_rule_is_active_next(struct net *net, const struct nft_rule *rule) -{ - return (rule->genmask & nft_genmask_next(net)) == 0; -} - -static inline void -nft_rule_activate_next(struct net *net, struct nft_rule *rule) -{ - /* Now inactive, will be active in the future */ - rule->genmask = nft_genmask_cur(net); -} - -static inline void -nft_rule_deactivate_next(struct net *net, struct nft_rule *rule) -{ - rule->genmask = nft_genmask_next(net); -} - -static inline void nft_rule_clear(struct net *net, struct nft_rule *rule) -{ - rule->genmask &= ~nft_genmask_next(net); -} - static int nf_tables_delrule_deactivate(struct nft_ctx *ctx, struct nft_rule *rule) { /* You cannot delete the same rule twice */ - if (nft_rule_is_active_next(ctx->net, rule)) { - nft_rule_deactivate_next(ctx->net, rule); + if (nft_is_active_next(ctx->net, rule)) { + nft_deactivate_next(ctx->net, rule); ctx->chain->use--; return 0; } @@ -1898,7 +1868,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb, list_for_each_entry_rcu(table, &afi->tables, list) { list_for_each_entry_rcu(chain, &table->chains, list) { list_for_each_entry_rcu(rule, &chain->rules, list) { - if (!nft_rule_is_active(net, rule)) + if (!nft_is_active(net, rule)) goto cont; if (idx < s_idx) goto cont; @@ -2102,7 +2072,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, if (rule == NULL) goto err1; - nft_rule_activate_next(net, rule); + nft_activate_next(net, rule); rule->handle = handle; rule->dlen = size; @@ -2124,14 +2094,14 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, } if (nlh->nlmsg_flags & NLM_F_REPLACE) { - if (nft_rule_is_active_next(net, old_rule)) { + if (nft_is_active_next(net, old_rule)) { trans = nft_trans_rule_add(&ctx, NFT_MSG_DELRULE, old_rule); if (trans == NULL) { err = -ENOMEM; goto err2; } - nft_rule_deactivate_next(net, old_rule); + nft_deactivate_next(net, old_rule); chain->use--; list_add_tail_rcu(&rule->list, &old_rule->list); } else { @@ -3980,7 +3950,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) trans->ctx.afi->nops); break; case NFT_MSG_NEWRULE: - nft_rule_clear(trans->ctx.net, nft_trans_rule(trans)); + nft_clear(trans->ctx.net, nft_trans_rule(trans)); nf_tables_rule_notify(&trans->ctx, nft_trans_rule(trans), NFT_MSG_NEWRULE); @@ -4116,7 +4086,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb) break; case NFT_MSG_DELRULE: trans->ctx.chain->use++; - nft_rule_clear(trans->ctx.net, nft_trans_rule(trans)); + nft_clear(trans->ctx.net, nft_trans_rule(trans)); nft_trans_destroy(trans); break; case NFT_MSG_NEWSET: -- cgit v1.2.3 From f2a6d766765d2794e26e25655d4ffcfe29c3ec2f Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 14 Jun 2016 17:29:18 +0200 Subject: netfilter: nf_tables: add generation mask to tables This patch addresses two problems: 1) The netlink dump is inconsistent when interfering with an ongoing transaction update for several reasons: 1.a) We don't honor the internal NFT_TABLE_INACTIVE flag, and we should be skipping these inactive objects in the dump. 1.b) We perform speculative deletion during the preparation phase, that may result in skipping active objects. 1.c) The listing order changes, which generates noise when tracking incremental ruleset update via tools like git or our own testsuite. 2) We don't allow to add and to update the object in the same batch, eg. add table x; add table x { flags dormant\; }. In order to resolve these problems: 1) If the user requests a deletion, the object becomes inactive in the next generation. Then, ignore objects that scheduled to be deleted from the lookup path, as they will be effectively removed in the next generation. 2) From the get/dump path, if the object is not currently active, we skip it. 3) Support 'add X -> update X' sequence from a transaction. After this update, we obtain a consistent list as long as we stay in the same generation. The userspace side can detect interferences through the generation counter so it can restart the dumping. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 6 ++- net/netfilter/nf_tables_api.c | 101 +++++++++++++++++++++----------------- 2 files changed, 62 insertions(+), 45 deletions(-) (limited to 'net/netfilter') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index d0778cbaf7f1..05c9a64b39aa 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -838,6 +838,7 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv); * @hgenerator: handle generator state * @use: number of chain references to this table * @flags: table flag (see enum nft_table_flags) + * @genmask: generation mask * @name: name of the table */ struct nft_table { @@ -846,7 +847,8 @@ struct nft_table { struct list_head sets; u64 hgenerator; u32 use; - u16 flags; + u16 flags:14, + genmask:2; char name[NFT_TABLE_MAXNAMELEN]; }; @@ -992,6 +994,8 @@ static inline u8 nft_genmask_cur(const struct net *net) /* After committing the ruleset, clear the stale generation bit. */ #define nft_clear(__net, __obj) \ (__obj)->genmask &= ~nft_genmask_next(__net) +#define nft_active_genmask(__obj, __genmask) \ + !((__obj)->genmask & __genmask) /* * Set element transaction helpers diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index d9f0f0797dec..a4a77d60e631 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -175,9 +175,6 @@ static void nf_tables_unregister_hooks(const struct nft_table *table, nft_unregister_basechain(nft_base_chain(chain), hook_nops); } -/* Internal table flags */ -#define NFT_TABLE_INACTIVE (1 << 15) - static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type) { struct nft_trans *trans; @@ -187,7 +184,7 @@ static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type) return -ENOMEM; if (msg_type == NFT_MSG_NEWTABLE) - ctx->table->flags |= NFT_TABLE_INACTIVE; + nft_activate_next(ctx->net, ctx->table); list_add_tail(&trans->list, &ctx->net->nft.commit_list); return 0; @@ -201,7 +198,7 @@ static int nft_deltable(struct nft_ctx *ctx) if (err < 0) return err; - list_del_rcu(&ctx->table->list); + nft_deactivate_next(ctx->net, ctx->table); return err; } @@ -334,26 +331,29 @@ static int nft_delset(struct nft_ctx *ctx, struct nft_set *set) */ static struct nft_table *nft_table_lookup(const struct nft_af_info *afi, - const struct nlattr *nla) + const struct nlattr *nla, + u8 genmask) { struct nft_table *table; list_for_each_entry(table, &afi->tables, list) { - if (!nla_strcmp(nla, table->name)) + if (!nla_strcmp(nla, table->name) && + nft_active_genmask(table, genmask)) return table; } return NULL; } static struct nft_table *nf_tables_table_lookup(const struct nft_af_info *afi, - const struct nlattr *nla) + const struct nlattr *nla, + u8 genmask) { struct nft_table *table; if (nla == NULL) return ERR_PTR(-EINVAL); - table = nft_table_lookup(afi, nla); + table = nft_table_lookup(afi, nla, genmask); if (table != NULL) return table; @@ -494,6 +494,8 @@ static int nf_tables_dump_tables(struct sk_buff *skb, if (idx > s_idx) memset(&cb->args[1], 0, sizeof(cb->args) - sizeof(cb->args[0])); + if (!nft_is_active(net, table)) + continue; if (nf_tables_fill_table_info(skb, net, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, @@ -518,6 +520,7 @@ static int nf_tables_gettable(struct net *net, struct sock *nlsk, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u8 genmask = nft_genmask_cur(net); const struct nft_af_info *afi; const struct nft_table *table; struct sk_buff *skb2; @@ -535,11 +538,9 @@ static int nf_tables_gettable(struct net *net, struct sock *nlsk, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]); + table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME], genmask); if (IS_ERR(table)) return PTR_ERR(table); - if (table->flags & NFT_TABLE_INACTIVE) - return -ENOENT; skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb2) @@ -648,6 +649,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u8 genmask = nft_genmask_next(net); const struct nlattr *name; struct nft_af_info *afi; struct nft_table *table; @@ -661,7 +663,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk, return PTR_ERR(afi); name = nla[NFTA_TABLE_NAME]; - table = nf_tables_table_lookup(afi, name); + table = nf_tables_table_lookup(afi, name, genmask); if (IS_ERR(table)) { if (PTR_ERR(table) != -ENOENT) return PTR_ERR(table); @@ -669,8 +671,6 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk, } if (table != NULL) { - if (table->flags & NFT_TABLE_INACTIVE) - return -ENOENT; if (nlh->nlmsg_flags & NLM_F_EXCL) return -EEXIST; if (nlh->nlmsg_flags & NLM_F_REPLACE) @@ -765,6 +765,9 @@ static int nft_flush(struct nft_ctx *ctx, int family) ctx->afi = afi; list_for_each_entry_safe(table, nt, &afi->tables, list) { + if (!nft_is_active_next(ctx->net, table)) + continue; + if (nla[NFTA_TABLE_NAME] && nla_strcmp(nla[NFTA_TABLE_NAME], table->name) != 0) continue; @@ -785,6 +788,7 @@ static int nf_tables_deltable(struct net *net, struct sock *nlsk, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u8 genmask = nft_genmask_next(net); struct nft_af_info *afi; struct nft_table *table; int family = nfmsg->nfgen_family; @@ -798,7 +802,7 @@ static int nf_tables_deltable(struct net *net, struct sock *nlsk, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]); + table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME], genmask); if (IS_ERR(table)) return PTR_ERR(table); @@ -1074,6 +1078,7 @@ static int nf_tables_getchain(struct net *net, struct sock *nlsk, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u8 genmask = nft_genmask_cur(net); const struct nft_af_info *afi; const struct nft_table *table; const struct nft_chain *chain; @@ -1092,11 +1097,9 @@ static int nf_tables_getchain(struct net *net, struct sock *nlsk, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]); + table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask); if (IS_ERR(table)) return PTR_ERR(table); - if (table->flags & NFT_TABLE_INACTIVE) - return -ENOENT; chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]); if (IS_ERR(chain)) @@ -1201,6 +1204,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, struct nft_chain *chain; struct nft_base_chain *basechain = NULL; struct nlattr *ha[NFTA_HOOK_MAX + 1]; + u8 genmask = nft_genmask_next(net); int family = nfmsg->nfgen_family; struct net_device *dev = NULL; u8 policy = NF_ACCEPT; @@ -1217,7 +1221,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]); + table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask); if (IS_ERR(table)) return PTR_ERR(table); @@ -1449,6 +1453,7 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u8 genmask = nft_genmask_next(net); struct nft_af_info *afi; struct nft_table *table; struct nft_chain *chain; @@ -1459,7 +1464,7 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]); + table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask); if (IS_ERR(table)) return PTR_ERR(table); @@ -1901,6 +1906,7 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u8 genmask = nft_genmask_cur(net); const struct nft_af_info *afi; const struct nft_table *table; const struct nft_chain *chain; @@ -1920,11 +1926,9 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]); + table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], genmask); if (IS_ERR(table)) return PTR_ERR(table); - if (table->flags & NFT_TABLE_INACTIVE) - return -ENOENT; chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]); if (IS_ERR(chain)) @@ -1979,6 +1983,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u8 genmask = nft_genmask_next(net); struct nft_af_info *afi; struct nft_table *table; struct nft_chain *chain; @@ -1999,7 +2004,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]); + table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], genmask); if (IS_ERR(table)) return PTR_ERR(table); @@ -2144,6 +2149,7 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u8 genmask = nft_genmask_next(net); struct nft_af_info *afi; struct nft_table *table; struct nft_chain *chain = NULL; @@ -2155,7 +2161,7 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]); + table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], genmask); if (IS_ERR(table)) return PTR_ERR(table); @@ -2309,7 +2315,8 @@ static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = { static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net, const struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[]) + const struct nlattr * const nla[], + u8 genmask) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); struct nft_af_info *afi = NULL; @@ -2325,7 +2332,8 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net, if (afi == NULL) return -EAFNOSUPPORT; - table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]); + table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE], + genmask); if (IS_ERR(table)) return PTR_ERR(table); } @@ -2586,6 +2594,7 @@ static int nf_tables_getset(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { + u8 genmask = nft_genmask_cur(net); const struct nft_set *set; struct nft_ctx ctx; struct sk_buff *skb2; @@ -2593,7 +2602,7 @@ static int nf_tables_getset(struct net *net, struct sock *nlsk, int err; /* Verify existence before starting dump */ - err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla); + err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla, genmask); if (err < 0) return err; @@ -2661,6 +2670,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u8 genmask = nft_genmask_next(net); const struct nft_set_ops *ops; struct nft_af_info *afi; struct nft_table *table; @@ -2758,7 +2768,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]); + table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE], genmask); if (IS_ERR(table)) return PTR_ERR(table); @@ -2863,6 +2873,7 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u8 genmask = nft_genmask_next(net); struct nft_set *set; struct nft_ctx ctx; int err; @@ -2872,7 +2883,7 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk, if (nla[NFTA_SET_TABLE] == NULL) return -EINVAL; - err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla); + err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla, genmask); if (err < 0) return err; @@ -3001,7 +3012,8 @@ static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, struct net *net, const struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[]) + const struct nlattr * const nla[], + u8 genmask) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); struct nft_af_info *afi; @@ -3011,7 +3023,8 @@ static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, struct net *net, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_SET_ELEM_LIST_TABLE]); + table = nf_tables_table_lookup(afi, nla[NFTA_SET_ELEM_LIST_TABLE], + genmask); if (IS_ERR(table)) return PTR_ERR(table); @@ -3108,6 +3121,7 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx, static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); + u8 genmask = nft_genmask_cur(net); const struct nft_set *set; struct nft_set_dump_args args; struct nft_ctx ctx; @@ -3124,11 +3138,9 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) return err; err = nft_ctx_init_from_elemattr(&ctx, net, cb->skb, cb->nlh, - (void *)nla); + (void *)nla, genmask); if (err < 0) return err; - if (ctx.table->flags & NFT_TABLE_INACTIVE) - return -ENOENT; set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]); if (IS_ERR(set)) @@ -3187,15 +3199,14 @@ static int nf_tables_getsetelem(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { + u8 genmask = nft_genmask_cur(net); const struct nft_set *set; struct nft_ctx ctx; int err; - err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla); + err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask); if (err < 0) return err; - if (ctx.table->flags & NFT_TABLE_INACTIVE) - return -ENOENT; set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]); if (IS_ERR(set)) @@ -3519,6 +3530,7 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { + u8 genmask = nft_genmask_next(net); const struct nlattr *attr; struct nft_set *set; struct nft_ctx ctx; @@ -3527,7 +3539,7 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk, if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL) return -EINVAL; - err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla); + err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask); if (err < 0) return err; @@ -3641,6 +3653,7 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { + u8 genmask = nft_genmask_next(net); const struct nlattr *attr; struct nft_set *set; struct nft_ctx ctx; @@ -3649,7 +3662,7 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk, if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL) return -EINVAL; - err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla); + err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask); if (err < 0) return err; @@ -3926,12 +3939,13 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) trans->ctx.table->flags |= NFT_TABLE_F_DORMANT; } } else { - trans->ctx.table->flags &= ~NFT_TABLE_INACTIVE; + nft_clear(net, trans->ctx.table); } nf_tables_table_notify(&trans->ctx, NFT_MSG_NEWTABLE); nft_trans_destroy(trans); break; case NFT_MSG_DELTABLE: + list_del_rcu(&trans->ctx.table->list); nf_tables_table_notify(&trans->ctx, NFT_MSG_DELTABLE); break; case NFT_MSG_NEWCHAIN: @@ -4057,8 +4071,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb) } break; case NFT_MSG_DELTABLE: - list_add_tail_rcu(&trans->ctx.table->list, - &trans->ctx.afi->tables); + nft_clear(trans->ctx.net, trans->ctx.table); nft_trans_destroy(trans); break; case NFT_MSG_NEWCHAIN: -- cgit v1.2.3 From 664b0f8cd8c66d02d14168ee7ac6a957cc88177f Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 12 Jun 2016 19:21:31 +0200 Subject: netfilter: nf_tables: add generation mask to chains Similar to ("netfilter: nf_tables: add generation mask to tables"). Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 4 +- net/netfilter/nf_tables_api.c | 89 +++++++++++++++++++++++++-------------- 2 files changed, 60 insertions(+), 33 deletions(-) (limited to 'net/netfilter') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 05c9a64b39aa..b023e287ea92 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -732,7 +732,6 @@ static inline struct nft_userdata *nft_userdata(const struct nft_rule *rule) enum nft_chain_flags { NFT_BASE_CHAIN = 0x1, - NFT_CHAIN_INACTIVE = 0x2, }; /** @@ -754,7 +753,8 @@ struct nft_chain { u64 handle; u32 use; u16 level; - u8 flags; + u8 flags:6, + genmask:2; char name[NFT_CHAIN_MAXNAMELEN]; }; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index a4a77d60e631..cae88f8a990e 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -211,7 +211,7 @@ static int nft_trans_chain_add(struct nft_ctx *ctx, int msg_type) return -ENOMEM; if (msg_type == NFT_MSG_NEWCHAIN) - ctx->chain->flags |= NFT_CHAIN_INACTIVE; + nft_activate_next(ctx->net, ctx->chain); list_add_tail(&trans->list, &ctx->net->nft.commit_list); return 0; @@ -226,7 +226,7 @@ static int nft_delchain(struct nft_ctx *ctx) return err; ctx->table->use--; - list_del_rcu(&ctx->chain->list); + nft_deactivate_next(ctx->net, ctx->chain); return err; } @@ -559,13 +559,16 @@ err: return err; } -static int nf_tables_table_enable(const struct nft_af_info *afi, +static int nf_tables_table_enable(struct net *net, + const struct nft_af_info *afi, struct nft_table *table) { struct nft_chain *chain; int err, i = 0; list_for_each_entry(chain, &table->chains, list) { + if (!nft_is_active_next(net, chain)) + continue; if (!(chain->flags & NFT_BASE_CHAIN)) continue; @@ -578,6 +581,8 @@ static int nf_tables_table_enable(const struct nft_af_info *afi, return 0; err: list_for_each_entry(chain, &table->chains, list) { + if (!nft_is_active_next(net, chain)) + continue; if (!(chain->flags & NFT_BASE_CHAIN)) continue; @@ -589,12 +594,15 @@ err: return err; } -static void nf_tables_table_disable(const struct nft_af_info *afi, +static void nf_tables_table_disable(struct net *net, + const struct nft_af_info *afi, struct nft_table *table) { struct nft_chain *chain; list_for_each_entry(chain, &table->chains, list) { + if (!nft_is_active_next(net, chain)) + continue; if (chain->flags & NFT_BASE_CHAIN) nft_unregister_basechain(nft_base_chain(chain), afi->nops); @@ -627,7 +635,7 @@ static int nf_tables_updtable(struct nft_ctx *ctx) nft_trans_table_enable(trans) = false; } else if (!(flags & NFT_TABLE_F_DORMANT) && ctx->table->flags & NFT_TABLE_F_DORMANT) { - ret = nf_tables_table_enable(ctx->afi, ctx->table); + ret = nf_tables_table_enable(ctx->net, ctx->afi, ctx->table); if (ret >= 0) { ctx->table->flags &= ~NFT_TABLE_F_DORMANT; nft_trans_table_enable(trans) = true; @@ -722,6 +730,9 @@ static int nft_flush_table(struct nft_ctx *ctx) struct nft_set *set, *ns; list_for_each_entry(chain, &ctx->table->chains, list) { + if (!nft_is_active_next(ctx->net, chain)) + continue; + ctx->chain = chain; err = nft_delrule_by_chain(ctx); @@ -740,6 +751,9 @@ static int nft_flush_table(struct nft_ctx *ctx) } list_for_each_entry_safe(chain, nc, &ctx->table->chains, list) { + if (!nft_is_active_next(ctx->net, chain)) + continue; + ctx->chain = chain; err = nft_delchain(ctx); @@ -849,12 +863,14 @@ EXPORT_SYMBOL_GPL(nft_unregister_chain_type); */ static struct nft_chain * -nf_tables_chain_lookup_byhandle(const struct nft_table *table, u64 handle) +nf_tables_chain_lookup_byhandle(const struct nft_table *table, u64 handle, + u8 genmask) { struct nft_chain *chain; list_for_each_entry(chain, &table->chains, list) { - if (chain->handle == handle) + if (chain->handle == handle && + nft_active_genmask(chain, genmask)) return chain; } @@ -862,7 +878,8 @@ nf_tables_chain_lookup_byhandle(const struct nft_table *table, u64 handle) } static struct nft_chain *nf_tables_chain_lookup(const struct nft_table *table, - const struct nlattr *nla) + const struct nlattr *nla, + u8 genmask) { struct nft_chain *chain; @@ -870,7 +887,8 @@ static struct nft_chain *nf_tables_chain_lookup(const struct nft_table *table, return ERR_PTR(-EINVAL); list_for_each_entry(chain, &table->chains, list) { - if (!nla_strcmp(nla, chain->name)) + if (!nla_strcmp(nla, chain->name) && + nft_active_genmask(chain, genmask)) return chain; } @@ -1053,6 +1071,8 @@ static int nf_tables_dump_chains(struct sk_buff *skb, if (idx > s_idx) memset(&cb->args[1], 0, sizeof(cb->args) - sizeof(cb->args[0])); + if (!nft_is_active(net, chain)) + continue; if (nf_tables_fill_chain_info(skb, net, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, @@ -1101,11 +1121,9 @@ static int nf_tables_getchain(struct net *net, struct sock *nlsk, if (IS_ERR(table)) return PTR_ERR(table); - chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]); + chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask); if (IS_ERR(chain)) return PTR_ERR(chain); - if (chain->flags & NFT_CHAIN_INACTIVE) - return -ENOENT; skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb2) @@ -1230,11 +1248,11 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, if (nla[NFTA_CHAIN_HANDLE]) { handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE])); - chain = nf_tables_chain_lookup_byhandle(table, handle); + chain = nf_tables_chain_lookup_byhandle(table, handle, genmask); if (IS_ERR(chain)) return PTR_ERR(chain); } else { - chain = nf_tables_chain_lookup(table, name); + chain = nf_tables_chain_lookup(table, name, genmask); if (IS_ERR(chain)) { if (PTR_ERR(chain) != -ENOENT) return PTR_ERR(chain); @@ -1265,16 +1283,20 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, struct nft_stats *stats = NULL; struct nft_trans *trans; - if (chain->flags & NFT_CHAIN_INACTIVE) - return -ENOENT; if (nlh->nlmsg_flags & NLM_F_EXCL) return -EEXIST; if (nlh->nlmsg_flags & NLM_F_REPLACE) return -EOPNOTSUPP; - if (nla[NFTA_CHAIN_HANDLE] && name && - !IS_ERR(nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]))) - return -EEXIST; + if (nla[NFTA_CHAIN_HANDLE] && name) { + struct nft_chain *chain2; + + chain2 = nf_tables_chain_lookup(table, + nla[NFTA_CHAIN_NAME], + genmask); + if (IS_ERR(chain2)) + return PTR_ERR(chain2); + } if (nla[NFTA_CHAIN_COUNTERS]) { if (!(chain->flags & NFT_BASE_CHAIN)) @@ -1468,7 +1490,7 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk, if (IS_ERR(table)) return PTR_ERR(table); - chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]); + chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask); if (IS_ERR(chain)) return PTR_ERR(chain); if (chain->use > 0) @@ -1930,11 +1952,9 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk, if (IS_ERR(table)) return PTR_ERR(table); - chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]); + chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask); if (IS_ERR(chain)) return PTR_ERR(chain); - if (chain->flags & NFT_CHAIN_INACTIVE) - return -ENOENT; rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]); if (IS_ERR(rule)) @@ -2008,7 +2028,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, if (IS_ERR(table)) return PTR_ERR(table); - chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]); + chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask); if (IS_ERR(chain)) return PTR_ERR(chain); @@ -2166,7 +2186,8 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk, return PTR_ERR(table); if (nla[NFTA_RULE_CHAIN]) { - chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]); + chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN], + genmask); if (IS_ERR(chain)) return PTR_ERR(chain); } @@ -2186,6 +2207,9 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk, } } else { list_for_each_entry(chain, &table->chains, list) { + if (!nft_is_active_next(net, chain)) + continue; + ctx.chain = chain; err = nft_delrule_by_chain(&ctx); if (err < 0) @@ -3934,7 +3958,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) case NFT_MSG_NEWTABLE: if (nft_trans_table_update(trans)) { if (!nft_trans_table_enable(trans)) { - nf_tables_table_disable(trans->ctx.afi, + nf_tables_table_disable(net, + trans->ctx.afi, trans->ctx.table); trans->ctx.table->flags |= NFT_TABLE_F_DORMANT; } @@ -3952,12 +3977,13 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) if (nft_trans_chain_update(trans)) nft_chain_commit_update(trans); else - trans->ctx.chain->flags &= ~NFT_CHAIN_INACTIVE; + nft_clear(net, trans->ctx.chain); nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN); nft_trans_destroy(trans); break; case NFT_MSG_DELCHAIN: + list_del_rcu(&trans->ctx.chain->list); nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN); nf_tables_unregister_hooks(trans->ctx.table, trans->ctx.chain, @@ -4061,7 +4087,8 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb) case NFT_MSG_NEWTABLE: if (nft_trans_table_update(trans)) { if (nft_trans_table_enable(trans)) { - nf_tables_table_disable(trans->ctx.afi, + nf_tables_table_disable(net, + trans->ctx.afi, trans->ctx.table); trans->ctx.table->flags |= NFT_TABLE_F_DORMANT; } @@ -4089,8 +4116,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb) break; case NFT_MSG_DELCHAIN: trans->ctx.table->use++; - list_add_tail_rcu(&trans->ctx.chain->list, - &trans->ctx.table->chains); + nft_clear(trans->ctx.net, trans->ctx.chain); nft_trans_destroy(trans); break; case NFT_MSG_NEWRULE: @@ -4413,6 +4439,7 @@ static const struct nla_policy nft_verdict_policy[NFTA_VERDICT_MAX + 1] = { static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, struct nft_data_desc *desc, const struct nlattr *nla) { + u8 genmask = nft_genmask_next(ctx->net); struct nlattr *tb[NFTA_VERDICT_MAX + 1]; struct nft_chain *chain; int err; @@ -4445,7 +4472,7 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, if (!tb[NFTA_VERDICT_CHAIN]) return -EINVAL; chain = nf_tables_chain_lookup(ctx->table, - tb[NFTA_VERDICT_CHAIN]); + tb[NFTA_VERDICT_CHAIN], genmask); if (IS_ERR(chain)) return PTR_ERR(chain); if (chain->flags & NFT_BASE_CHAIN) -- cgit v1.2.3 From 37a9cc52552579f22e18cca401cfc4351b6cbc72 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 12 Jun 2016 22:52:45 +0200 Subject: netfilter: nf_tables: add generation mask to sets Similar to ("netfilter: nf_tables: add generation mask to tables"). Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 8 +++-- net/netfilter/nf_tables_api.c | 68 +++++++++++++++++++++++---------------- net/netfilter/nft_dynset.c | 7 ++-- net/netfilter/nft_lookup.c | 6 ++-- 4 files changed, 54 insertions(+), 35 deletions(-) (limited to 'net/netfilter') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index b023e287ea92..07a5ba47cbda 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -296,6 +296,7 @@ void nft_unregister_set(struct nft_set_ops *ops); * @ops: set ops * @pnet: network namespace * @flags: set flags + * @genmask: generation mask * @klen: key length * @dlen: data length * @data: private set data @@ -317,7 +318,8 @@ struct nft_set { /* runtime data below here */ const struct nft_set_ops *ops ____cacheline_aligned; possible_net_t pnet; - u16 flags; + u16 flags:14, + genmask:2; u8 klen; u8 dlen; unsigned char data[] @@ -335,9 +337,9 @@ static inline struct nft_set *nft_set_container_of(const void *priv) } struct nft_set *nf_tables_set_lookup(const struct nft_table *table, - const struct nlattr *nla); + const struct nlattr *nla, u8 genmask); struct nft_set *nf_tables_set_lookup_byid(const struct net *net, - const struct nlattr *nla); + const struct nlattr *nla, u8 genmask); static inline unsigned long nft_set_gc_interval(const struct nft_set *set) { diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index cae88f8a990e..3316bce0a878 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -289,9 +289,6 @@ static int nft_delrule_by_chain(struct nft_ctx *ctx) return 0; } -/* Internal set flag */ -#define NFT_SET_INACTIVE (1 << 15) - static int nft_trans_set_add(struct nft_ctx *ctx, int msg_type, struct nft_set *set) { @@ -304,7 +301,7 @@ static int nft_trans_set_add(struct nft_ctx *ctx, int msg_type, if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] != NULL) { nft_trans_set_id(trans) = ntohl(nla_get_be32(ctx->nla[NFTA_SET_ID])); - set->flags |= NFT_SET_INACTIVE; + nft_activate_next(ctx->net, set); } nft_trans_set(trans) = set; list_add_tail(&trans->list, &ctx->net->nft.commit_list); @@ -320,7 +317,7 @@ static int nft_delset(struct nft_ctx *ctx, struct nft_set *set) if (err < 0) return err; - list_del_rcu(&set->list); + nft_deactivate_next(ctx->net, set); ctx->table->use--; return err; @@ -741,6 +738,9 @@ static int nft_flush_table(struct nft_ctx *ctx) } list_for_each_entry_safe(set, ns, &ctx->table->sets, list) { + if (!nft_is_active_next(ctx->net, set)) + continue; + if (set->flags & NFT_SET_ANONYMOUS && !list_empty(&set->bindings)) continue; @@ -2367,7 +2367,7 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net, } struct nft_set *nf_tables_set_lookup(const struct nft_table *table, - const struct nlattr *nla) + const struct nlattr *nla, u8 genmask) { struct nft_set *set; @@ -2375,22 +2375,27 @@ struct nft_set *nf_tables_set_lookup(const struct nft_table *table, return ERR_PTR(-EINVAL); list_for_each_entry(set, &table->sets, list) { - if (!nla_strcmp(nla, set->name)) + if (!nla_strcmp(nla, set->name) && + nft_active_genmask(set, genmask)) return set; } return ERR_PTR(-ENOENT); } struct nft_set *nf_tables_set_lookup_byid(const struct net *net, - const struct nlattr *nla) + const struct nlattr *nla, + u8 genmask) { struct nft_trans *trans; u32 id = ntohl(nla_get_be32(nla)); list_for_each_entry(trans, &net->nft.commit_list, list) { + struct nft_set *set = nft_trans_set(trans); + if (trans->msg_type == NFT_MSG_NEWSET && - id == nft_trans_set_id(trans)) - return nft_trans_set(trans); + id == nft_trans_set_id(trans) && + nft_active_genmask(set, genmask)) + return set; } return ERR_PTR(-ENOENT); } @@ -2415,6 +2420,8 @@ cont: list_for_each_entry(i, &ctx->table->sets, list) { int tmp; + if (!nft_is_active_next(ctx->net, set)) + continue; if (!sscanf(i->name, name, &tmp)) continue; if (tmp < min || tmp >= min + BITS_PER_BYTE * PAGE_SIZE) @@ -2434,6 +2441,8 @@ cont: snprintf(set->name, sizeof(set->name), name, min + n); list_for_each_entry(i, &ctx->table->sets, list) { + if (!nft_is_active_next(ctx->net, i)) + continue; if (!strcmp(set->name, i->name)) return -ENFILE; } @@ -2582,6 +2591,8 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb) list_for_each_entry_rcu(set, &table->sets, list) { if (idx < s_idx) goto cont; + if (!nft_is_active(net, set)) + goto cont; ctx_set = *ctx; ctx_set.table = table; @@ -2651,11 +2662,9 @@ static int nf_tables_getset(struct net *net, struct sock *nlsk, if (nfmsg->nfgen_family == NFPROTO_UNSPEC) return -EAFNOSUPPORT; - set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]); + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask); if (IS_ERR(set)) return PTR_ERR(set); - if (set->flags & NFT_SET_INACTIVE) - return -ENOENT; skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (skb2 == NULL) @@ -2798,7 +2807,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla); - set = nf_tables_set_lookup(table, nla[NFTA_SET_NAME]); + set = nf_tables_set_lookup(table, nla[NFTA_SET_NAME], genmask); if (IS_ERR(set)) { if (PTR_ERR(set) != -ENOENT) return PTR_ERR(set); @@ -2911,7 +2920,7 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk, if (err < 0) return err; - set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]); + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask); if (IS_ERR(set)) return PTR_ERR(set); if (!list_empty(&set->bindings)) @@ -2980,7 +2989,7 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, list_del_rcu(&binding->list); if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS && - !(set->flags & NFT_SET_INACTIVE)) + nft_is_active(ctx->net, set)) nf_tables_set_destroy(ctx, set); } @@ -3166,11 +3175,10 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) if (err < 0) return err; - set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]); + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET], + genmask); if (IS_ERR(set)) return PTR_ERR(set); - if (set->flags & NFT_SET_INACTIVE) - return -ENOENT; event = NFT_MSG_NEWSETELEM; event |= NFNL_SUBSYS_NFTABLES << 8; @@ -3232,11 +3240,10 @@ static int nf_tables_getsetelem(struct net *net, struct sock *nlsk, if (err < 0) return err; - set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]); + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET], + genmask); if (IS_ERR(set)) return PTR_ERR(set); - if (set->flags & NFT_SET_INACTIVE) - return -ENOENT; if (nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { @@ -3567,11 +3574,13 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk, if (err < 0) return err; - set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]); + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET], + genmask); if (IS_ERR(set)) { if (nla[NFTA_SET_ELEM_LIST_SET_ID]) { set = nf_tables_set_lookup_byid(net, - nla[NFTA_SET_ELEM_LIST_SET_ID]); + nla[NFTA_SET_ELEM_LIST_SET_ID], + genmask); } if (IS_ERR(set)) return PTR_ERR(set); @@ -3690,7 +3699,8 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk, if (err < 0) return err; - set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]); + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET], + genmask); if (IS_ERR(set)) return PTR_ERR(set); if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT) @@ -4003,7 +4013,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) NFT_MSG_DELRULE); break; case NFT_MSG_NEWSET: - nft_trans_set(trans)->flags &= ~NFT_SET_INACTIVE; + nft_clear(net, nft_trans_set(trans)); /* This avoids hitting -EBUSY when deleting the table * from the transaction. */ @@ -4016,6 +4026,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nft_trans_destroy(trans); break; case NFT_MSG_DELSET: + list_del_rcu(&nft_trans_set(trans)->list); nf_tables_set_notify(&trans->ctx, nft_trans_set(trans), NFT_MSG_DELSET, GFP_KERNEL); break; @@ -4134,8 +4145,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb) break; case NFT_MSG_DELSET: trans->ctx.table->use++; - list_add_tail_rcu(&nft_trans_set(trans)->list, - &trans->ctx.table->sets); + nft_clear(trans->ctx.net, nft_trans_set(trans)); nft_trans_destroy(trans); break; case NFT_MSG_NEWSETELEM: @@ -4282,6 +4292,8 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx, } list_for_each_entry(set, &ctx->table->sets, list) { + if (!nft_is_active_next(ctx->net, set)) + continue; if (!(set->flags & NFT_SET_MAP) || set->dtype != NFT_DATA_VERDICT) continue; diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 78d4914fb39c..0af26699bf04 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -103,6 +103,7 @@ static int nft_dynset_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_dynset *priv = nft_expr_priv(expr); + u8 genmask = nft_genmask_next(ctx->net); struct nft_set *set; u64 timeout; int err; @@ -112,11 +113,13 @@ static int nft_dynset_init(const struct nft_ctx *ctx, tb[NFTA_DYNSET_SREG_KEY] == NULL) return -EINVAL; - set = nf_tables_set_lookup(ctx->table, tb[NFTA_DYNSET_SET_NAME]); + set = nf_tables_set_lookup(ctx->table, tb[NFTA_DYNSET_SET_NAME], + genmask); if (IS_ERR(set)) { if (tb[NFTA_DYNSET_SET_ID]) set = nf_tables_set_lookup_byid(ctx->net, - tb[NFTA_DYNSET_SET_ID]); + tb[NFTA_DYNSET_SET_ID], + genmask); if (IS_ERR(set)) return PTR_ERR(set); } diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index b3c31ef8015d..8a102cf855d0 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -54,6 +54,7 @@ static int nft_lookup_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_lookup *priv = nft_expr_priv(expr); + u8 genmask = nft_genmask_next(ctx->net); struct nft_set *set; int err; @@ -61,11 +62,12 @@ static int nft_lookup_init(const struct nft_ctx *ctx, tb[NFTA_LOOKUP_SREG] == NULL) return -EINVAL; - set = nf_tables_set_lookup(ctx->table, tb[NFTA_LOOKUP_SET]); + set = nf_tables_set_lookup(ctx->table, tb[NFTA_LOOKUP_SET], genmask); if (IS_ERR(set)) { if (tb[NFTA_LOOKUP_SET_ID]) { set = nf_tables_set_lookup_byid(ctx->net, - tb[NFTA_LOOKUP_SET_ID]); + tb[NFTA_LOOKUP_SET_ID], + genmask); } if (IS_ERR(set)) return PTR_ERR(set); -- cgit v1.2.3 From 4e5001651f5e488eac378ebabc5bde2a8f1ea861 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 21 Jun 2016 00:12:15 +0200 Subject: netfilter: nft_rbtree: check for next generation when deactivating elements set->ops->deactivate() is invoked from nft_del_setelem() that happens from the transaction path, so we have to check if the object is active in the next generation, not the current. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_rbtree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c index f762094af7c1..86fbe5e68d28 100644 --- a/net/netfilter/nft_rbtree.c +++ b/net/netfilter/nft_rbtree.c @@ -170,7 +170,7 @@ static void *nft_rbtree_deactivate(const struct nft_set *set, const struct nft_rbtree *priv = nft_set_priv(set); const struct rb_node *parent = priv->root.rb_node; struct nft_rbtree_elem *rbe, *this = elem->priv; - u8 genmask = nft_genmask_cur(read_pnet(&set->pnet)); + u8 genmask = nft_genmask_next(read_pnet(&set->pnet)); int d; while (parent != NULL) { -- cgit v1.2.3 From 8eee54be73f4b938dbf48e95c0dbecb5f19b08ee Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 21 Jun 2016 00:12:26 +0200 Subject: netfilter: nft_hash: support deletion of inactive elements New elements are inactive in the preparation phase, and its NFT_SET_ELEM_BUSY_MASK flag is set on. This busy flag doesn't allow us to delete it from the same transaction, following a sequence like: begin transaction add element X delete element X end transaction This sequence is valid and may be triggered by robots. To resolve this problem, allow deactivating elements that are active in the current generation (ie. those that has been just added in this batch). Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_hash.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 6fa016564f90..d3a507d3f192 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -153,9 +153,10 @@ static void *nft_hash_deactivate(const struct nft_set *set, const struct nft_set_elem *elem) { struct nft_hash *priv = nft_set_priv(set); + struct net *net = read_pnet(&set->pnet); struct nft_hash_elem *he; struct nft_hash_cmp_arg arg = { - .genmask = nft_genmask_next(read_pnet(&set->pnet)), + .genmask = nft_genmask_next(net), .set = set, .key = elem->key.val.data, }; @@ -163,7 +164,8 @@ static void *nft_hash_deactivate(const struct nft_set *set, rcu_read_lock(); he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params); if (he != NULL) { - if (!nft_set_elem_mark_busy(&he->ext)) + if (!nft_set_elem_mark_busy(&he->ext) || + !nft_is_active(net, &he->ext)) nft_set_elem_change_active(set, &he->ext); else he = NULL; -- cgit v1.2.3 From 3183ab8997a477c8d9ad175a1cef70dff77c6dbc Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 22 Jun 2016 13:26:10 +0200 Subject: netfilter: conntrack: allow increasing bucket size via sysctl too No need to restrict this to module parameter. We export a copy of the real hash size -- when user alters the value we allocate the new table, copy entries etc before we update the real size to the requested one. This is also needed because the real size is used by concurrent readers and cannot be changed without synchronizing the conntrack generation seqcnt. We only allow changing this value from the initial net namespace. Tested using http-client-benchmark vs. httpterm with concurrent while true;do echo $RANDOM > /proc/sys/net/netfilter/nf_conntrack_buckets done Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- Documentation/networking/nf_conntrack-sysctl.txt | 3 +- include/net/netfilter/nf_conntrack.h | 1 + net/netfilter/nf_conntrack_core.c | 41 ++++++++++++++++-------- net/netfilter/nf_conntrack_standalone.c | 36 ++++++++++++++++++--- 4 files changed, 62 insertions(+), 19 deletions(-) (limited to 'net/netfilter') diff --git a/Documentation/networking/nf_conntrack-sysctl.txt b/Documentation/networking/nf_conntrack-sysctl.txt index f55599c62c9d..4fb51d32fccc 100644 --- a/Documentation/networking/nf_conntrack-sysctl.txt +++ b/Documentation/networking/nf_conntrack-sysctl.txt @@ -7,12 +7,13 @@ nf_conntrack_acct - BOOLEAN Enable connection tracking flow accounting. 64-bit byte and packet counters per flow are added. -nf_conntrack_buckets - INTEGER (read-only) +nf_conntrack_buckets - INTEGER Size of hash table. If not specified as parameter during module loading, the default size is calculated by dividing total memory by 16384 to determine the number of buckets but the hash table will never have fewer than 32 and limited to 16384 buckets. For systems with more than 4GB of memory it will be 65536 buckets. + This sysctl is only writeable in the initial net namespace. nf_conntrack_checksum - BOOLEAN 0 - disabled diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 9c0ed3d7af89..5d3397f34583 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -290,6 +290,7 @@ static inline bool nf_is_loopback_packet(const struct sk_buff *skb) struct kernel_param; int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp); +int nf_conntrack_hash_resize(unsigned int hashsize); extern unsigned int nf_conntrack_htable_size; extern unsigned int nf_conntrack_max; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index a459176c3253..e17d5c7faca0 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1595,24 +1595,14 @@ void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls) } EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable); -int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) +int nf_conntrack_hash_resize(unsigned int hashsize) { - int i, bucket, rc; - unsigned int hashsize, old_size; + int i, bucket; + unsigned int old_size; struct hlist_nulls_head *hash, *old_hash; struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; - if (current->nsproxy->net_ns != &init_net) - return -EOPNOTSUPP; - - /* On boot, we can set this without any fancy locking. */ - if (!nf_conntrack_htable_size) - return param_set_uint(val, kp); - - rc = kstrtouint(val, 0, &hashsize); - if (rc) - return rc; if (!hashsize) return -EINVAL; @@ -1620,6 +1610,12 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) if (!hash) return -ENOMEM; + old_size = nf_conntrack_htable_size; + if (old_size == hashsize) { + nf_ct_free_hashtable(hash, hashsize); + return 0; + } + local_bh_disable(); nf_conntrack_all_lock(); write_seqcount_begin(&nf_conntrack_generation); @@ -1655,6 +1651,25 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) nf_ct_free_hashtable(old_hash, old_size); return 0; } + +int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) +{ + unsigned int hashsize; + int rc; + + if (current->nsproxy->net_ns != &init_net) + return -EOPNOTSUPP; + + /* On boot, we can set this without any fancy locking. */ + if (!nf_conntrack_htable_size) + return param_set_uint(val, kp); + + rc = kstrtouint(val, 0, &hashsize); + if (rc) + return rc; + + return nf_conntrack_hash_resize(hashsize); +} EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize); module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint, diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index f87e84ebcec3..a0cc1919f081 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -434,8 +434,29 @@ static void nf_conntrack_standalone_fini_proc(struct net *net) #ifdef CONFIG_SYSCTL /* Log invalid packets of a given protocol */ -static int log_invalid_proto_min = 0; -static int log_invalid_proto_max = 255; +static int log_invalid_proto_min __read_mostly; +static int log_invalid_proto_max __read_mostly = 255; + +/* size the user *wants to set */ +static unsigned int nf_conntrack_htable_size_user __read_mostly; + +static int +nf_conntrack_hash_sysctl(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int ret; + + ret = proc_dointvec(table, write, buffer, lenp, ppos); + if (ret < 0 || !write) + return ret; + + /* update ret, we might not be able to satisfy request */ + ret = nf_conntrack_hash_resize(nf_conntrack_htable_size_user); + + /* update it to the actual value used by conntrack */ + nf_conntrack_htable_size_user = nf_conntrack_htable_size; + return ret; +} static struct ctl_table_header *nf_ct_netfilter_header; @@ -456,10 +477,10 @@ static struct ctl_table nf_ct_sysctl_table[] = { }, { .procname = "nf_conntrack_buckets", - .data = &nf_conntrack_htable_size, + .data = &nf_conntrack_htable_size_user, .maxlen = sizeof(unsigned int), - .mode = 0444, - .proc_handler = proc_dointvec, + .mode = 0644, + .proc_handler = nf_conntrack_hash_sysctl, }, { .procname = "nf_conntrack_checksum", @@ -517,6 +538,9 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net) if (net->user_ns != &init_user_ns) table[0].procname = NULL; + if (!net_eq(&init_net, net)) + table[2].mode = 0444; + net->ct.sysctl_header = register_net_sysctl(net, "net/netfilter", table); if (!net->ct.sysctl_header) goto out_unregister_netfilter; @@ -606,6 +630,8 @@ static int __init nf_conntrack_standalone_init(void) ret = -ENOMEM; goto out_sysctl; } + + nf_conntrack_htable_size_user = nf_conntrack_htable_size; #endif ret = register_pernet_subsys(&nf_conntrack_net_ops); -- cgit v1.2.3 From 82bec71d46b83f39860e2838ff8394e4fcd6efab Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 22 Jun 2016 14:26:33 +0200 Subject: netfilter: nf_tables: get rid of NFT_BASECHAIN_DISABLED This flag was introduced to restore rulesets from the new netdev family, but since 5ebe0b0eec9d6f7 ("netfilter: nf_tables: destroy basechain and rules on netdevice removal") the ruleset is released once the netdev is gone. This also removes nft_register_basechain() and nft_unregister_basechain() since they have no clients anymore after this rework. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 1 - net/netfilter/nf_tables_api.c | 62 ++++++++++++++++----------------------- 2 files changed, 25 insertions(+), 38 deletions(-) (limited to 'net/netfilter') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 07a5ba47cbda..1ea19a6e72e6 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -798,7 +798,6 @@ struct nft_stats { }; #define NFT_HOOK_OPS_MAX 2 -#define NFT_BASECHAIN_DISABLED (1 << 0) /** * struct nft_base_chain - nf_tables base chain diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 3316bce0a878..92c9faeb2bf8 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -131,29 +131,8 @@ static void nft_trans_destroy(struct nft_trans *trans) kfree(trans); } -static int nft_register_basechain(struct nft_base_chain *basechain, - unsigned int hook_nops) -{ - struct net *net = read_pnet(&basechain->pnet); - - if (basechain->flags & NFT_BASECHAIN_DISABLED) - return 0; - - return nf_register_net_hooks(net, basechain->ops, hook_nops); -} - -static void nft_unregister_basechain(struct nft_base_chain *basechain, - unsigned int hook_nops) -{ - struct net *net = read_pnet(&basechain->pnet); - - if (basechain->flags & NFT_BASECHAIN_DISABLED) - return; - - nf_unregister_net_hooks(net, basechain->ops, hook_nops); -} - -static int nf_tables_register_hooks(const struct nft_table *table, +static int nf_tables_register_hooks(struct net *net, + const struct nft_table *table, struct nft_chain *chain, unsigned int hook_nops) { @@ -161,10 +140,12 @@ static int nf_tables_register_hooks(const struct nft_table *table, !(chain->flags & NFT_BASE_CHAIN)) return 0; - return nft_register_basechain(nft_base_chain(chain), hook_nops); + return nf_register_net_hooks(net, nft_base_chain(chain)->ops, + hook_nops); } -static void nf_tables_unregister_hooks(const struct nft_table *table, +static void nf_tables_unregister_hooks(struct net *net, + const struct nft_table *table, struct nft_chain *chain, unsigned int hook_nops) { @@ -172,7 +153,7 @@ static void nf_tables_unregister_hooks(const struct nft_table *table, !(chain->flags & NFT_BASE_CHAIN)) return; - nft_unregister_basechain(nft_base_chain(chain), hook_nops); + nf_unregister_net_hooks(net, nft_base_chain(chain)->ops, hook_nops); } static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type) @@ -569,7 +550,8 @@ static int nf_tables_table_enable(struct net *net, if (!(chain->flags & NFT_BASE_CHAIN)) continue; - err = nft_register_basechain(nft_base_chain(chain), afi->nops); + err = nf_register_net_hooks(net, nft_base_chain(chain)->ops, + afi->nops); if (err < 0) goto err; @@ -586,7 +568,8 @@ err: if (i-- <= 0) break; - nft_unregister_basechain(nft_base_chain(chain), afi->nops); + nf_unregister_net_hooks(net, nft_base_chain(chain)->ops, + afi->nops); } return err; } @@ -600,9 +583,11 @@ static void nf_tables_table_disable(struct net *net, list_for_each_entry(chain, &table->chains, list) { if (!nft_is_active_next(net, chain)) continue; - if (chain->flags & NFT_BASE_CHAIN) - nft_unregister_basechain(nft_base_chain(chain), - afi->nops); + if (!(chain->flags & NFT_BASE_CHAIN)) + continue; + + nf_unregister_net_hooks(net, nft_base_chain(chain)->ops, + afi->nops); } } @@ -1451,7 +1436,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, chain->table = table; nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN); - err = nf_tables_register_hooks(table, chain, afi->nops); + err = nf_tables_register_hooks(net, table, chain, afi->nops); if (err < 0) goto err1; @@ -1464,7 +1449,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, list_add_tail_rcu(&chain->list, &table->chains); return 0; err2: - nf_tables_unregister_hooks(table, chain, afi->nops); + nf_tables_unregister_hooks(net, table, chain, afi->nops); err1: nf_tables_chain_destroy(chain); return err; @@ -3995,7 +3980,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) case NFT_MSG_DELCHAIN: list_del_rcu(&trans->ctx.chain->list); nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN); - nf_tables_unregister_hooks(trans->ctx.table, + nf_tables_unregister_hooks(trans->ctx.net, + trans->ctx.table, trans->ctx.chain, trans->ctx.afi->nops); break; @@ -4120,7 +4106,8 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb) } else { trans->ctx.table->use--; list_del_rcu(&trans->ctx.chain->list); - nf_tables_unregister_hooks(trans->ctx.table, + nf_tables_unregister_hooks(trans->ctx.net, + trans->ctx.table, trans->ctx.chain, trans->ctx.afi->nops); } @@ -4662,7 +4649,7 @@ int __nft_release_basechain(struct nft_ctx *ctx) BUG_ON(!(ctx->chain->flags & NFT_BASE_CHAIN)); - nf_tables_unregister_hooks(ctx->chain->table, ctx->chain, + nf_tables_unregister_hooks(ctx->net, ctx->chain->table, ctx->chain, ctx->afi->nops); list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) { list_del(&rule->list); @@ -4691,7 +4678,8 @@ static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi) list_for_each_entry_safe(table, nt, &afi->tables, list) { list_for_each_entry(chain, &table->chains, list) - nf_tables_unregister_hooks(table, chain, afi->nops); + nf_tables_unregister_hooks(net, table, chain, + afi->nops); /* No packets are walking on these chains anymore. */ ctx.table = table; list_for_each_entry(chain, &table->chains, list) { -- cgit v1.2.3 From 0071e184a535e40ce487528cb04f4690cb0da881 Mon Sep 17 00:00:00 2001 From: Arturo Borrero Date: Thu, 23 Jun 2016 12:24:08 +0200 Subject: netfilter: nf_tables: add support for inverted logic in nft_lookup Introduce a new configuration option for this expression, which allows users to invert the logic of set lookups. In _init() we will now return EINVAL if NFT_LOOKUP_F_INV is in anyway related to a map lookup. The code in the _eval() function has been untangled and updated to sopport the XOR of options, as we should consider 4 cases: * lookup false, invert false -> NFT_BREAK * lookup false, invert true -> return w/o NFT_BREAK * lookup true, invert false -> return w/o NFT_BREAK * lookup true, invert true -> NFT_BREAK Signed-off-by: Arturo Borrero Gonzalez Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 6 ++++++ net/netfilter/nft_lookup.c | 37 +++++++++++++++++++++++++++----- 2 files changed, 38 insertions(+), 5 deletions(-) (limited to 'net/netfilter') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 6a4dbe04f09e..01751faccaf8 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -546,6 +546,10 @@ enum nft_cmp_attributes { }; #define NFTA_CMP_MAX (__NFTA_CMP_MAX - 1) +enum nft_lookup_flags { + NFT_LOOKUP_F_INV = (1 << 0), +}; + /** * enum nft_lookup_attributes - nf_tables set lookup expression netlink attributes * @@ -553,6 +557,7 @@ enum nft_cmp_attributes { * @NFTA_LOOKUP_SREG: source register of the data to look for (NLA_U32: nft_registers) * @NFTA_LOOKUP_DREG: destination register (NLA_U32: nft_registers) * @NFTA_LOOKUP_SET_ID: uniquely identifies a set in a transaction (NLA_U32) + * @NFTA_LOOKUP_FLAGS: flags (NLA_U32: enum nft_lookup_flags) */ enum nft_lookup_attributes { NFTA_LOOKUP_UNSPEC, @@ -560,6 +565,7 @@ enum nft_lookup_attributes { NFTA_LOOKUP_SREG, NFTA_LOOKUP_DREG, NFTA_LOOKUP_SET_ID, + NFTA_LOOKUP_FLAGS, __NFTA_LOOKUP_MAX }; #define NFTA_LOOKUP_MAX (__NFTA_LOOKUP_MAX - 1) diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index 8a102cf855d0..b8d18f598569 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -22,6 +22,7 @@ struct nft_lookup { struct nft_set *set; enum nft_registers sreg:8; enum nft_registers dreg:8; + bool invert; struct nft_set_binding binding; }; @@ -32,14 +33,20 @@ static void nft_lookup_eval(const struct nft_expr *expr, const struct nft_lookup *priv = nft_expr_priv(expr); const struct nft_set *set = priv->set; const struct nft_set_ext *ext; + bool found; - if (set->ops->lookup(set, ®s->data[priv->sreg], &ext)) { - if (set->flags & NFT_SET_MAP) - nft_data_copy(®s->data[priv->dreg], - nft_set_ext_data(ext), set->dlen); + found = set->ops->lookup(set, ®s->data[priv->sreg], &ext) ^ + priv->invert; + + if (!found) { + regs->verdict.code = NFT_BREAK; return; } - regs->verdict.code = NFT_BREAK; + + if (found && set->flags & NFT_SET_MAP) + nft_data_copy(®s->data[priv->dreg], + nft_set_ext_data(ext), set->dlen); + } static const struct nla_policy nft_lookup_policy[NFTA_LOOKUP_MAX + 1] = { @@ -47,6 +54,7 @@ static const struct nla_policy nft_lookup_policy[NFTA_LOOKUP_MAX + 1] = { [NFTA_LOOKUP_SET_ID] = { .type = NLA_U32 }, [NFTA_LOOKUP_SREG] = { .type = NLA_U32 }, [NFTA_LOOKUP_DREG] = { .type = NLA_U32 }, + [NFTA_LOOKUP_FLAGS] = { .type = NLA_U32 }, }; static int nft_lookup_init(const struct nft_ctx *ctx, @@ -56,6 +64,7 @@ static int nft_lookup_init(const struct nft_ctx *ctx, struct nft_lookup *priv = nft_expr_priv(expr); u8 genmask = nft_genmask_next(ctx->net); struct nft_set *set; + u32 flags; int err; if (tb[NFTA_LOOKUP_SET] == NULL || @@ -81,7 +90,22 @@ static int nft_lookup_init(const struct nft_ctx *ctx, if (err < 0) return err; + if (tb[NFTA_LOOKUP_FLAGS]) { + flags = ntohl(nla_get_be32(tb[NFTA_LOOKUP_FLAGS])); + + if (flags & ~NFT_LOOKUP_F_INV) + return -EINVAL; + + if (flags & NFT_LOOKUP_F_INV) { + if (set->flags & NFT_SET_MAP) + return -EINVAL; + priv->invert = true; + } + } + if (tb[NFTA_LOOKUP_DREG] != NULL) { + if (priv->invert) + return -EINVAL; if (!(set->flags & NFT_SET_MAP)) return -EINVAL; @@ -114,6 +138,7 @@ static void nft_lookup_destroy(const struct nft_ctx *ctx, static int nft_lookup_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_lookup *priv = nft_expr_priv(expr); + u32 flags = priv->invert ? NFT_LOOKUP_F_INV : 0; if (nla_put_string(skb, NFTA_LOOKUP_SET, priv->set->name)) goto nla_put_failure; @@ -122,6 +147,8 @@ static int nft_lookup_dump(struct sk_buff *skb, const struct nft_expr *expr) if (priv->set->flags & NFT_SET_MAP) if (nft_dump_register(skb, NFTA_LOOKUP_DREG, priv->dreg)) goto nla_put_failure; + if (nla_put_be32(skb, NFTA_LOOKUP_FLAGS, htonl(flags))) + goto nla_put_failure; return 0; nla_put_failure: -- cgit v1.2.3 From f1504307b9ab60e73ba31eece4be8298ebc9c1b7 Mon Sep 17 00:00:00 2001 From: Moritz Sichert Date: Thu, 30 Jun 2016 11:46:28 +0200 Subject: netfilter: Remove references to obsolete CONFIG_IP_ROUTE_FWMARK This option was removed in commit 47dcf0cb1005 ("[NET]: Rethink mark field in struct flowi"). Signed-off-by: Moritz Sichert Signed-off-by: Pablo Neira Ayuso --- net/netfilter/Kconfig | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 95e757c377f9..9266ceebd112 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -609,9 +609,8 @@ config NETFILTER_XT_MARK The target allows you to create rules in the "mangle" table which alter the netfilter mark (nfmark) field associated with the packet. - Prior to routing, the nfmark can influence the routing method (see - "Use netfilter MARK value as routing key") and can also be used by - other subsystems to change their behavior. + Prior to routing, the nfmark can influence the routing method and can + also be used by other subsystems to change their behavior. config NETFILTER_XT_CONNMARK tristate 'ctmark target and match support' @@ -753,9 +752,8 @@ config NETFILTER_XT_TARGET_HMARK The target allows you to create rules in the "raw" and "mangle" tables which set the skbuff mark by means of hash calculation within a given - range. The nfmark can influence the routing method (see "Use netfilter - MARK value as routing key") and can also be used by other subsystems to - change their behaviour. + range. The nfmark can influence the routing method and can also be used + by other subsystems to change their behaviour. To compile it as a module, choose M here. If unsure, say N. -- cgit v1.2.3 From c37a2dfa67f7920b14ea77dc9f9f9660f7a1f6dd Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Fri, 24 Jun 2016 13:25:22 -0700 Subject: netfilter: Convert FWINV<[foo]> macros and uses to NF_INVF netfilter uses multiple FWINV #defines with identical form that hide a specific structure variable and dereference it with a invflags member. $ git grep "#define FWINV" include/linux/netfilter_bridge/ebtables.h:#define FWINV(bool,invflg) ((bool) ^ !!(info->invflags & invflg)) net/bridge/netfilter/ebtables.c:#define FWINV2(bool, invflg) ((bool) ^ !!(e->invflags & invflg)) net/ipv4/netfilter/arp_tables.c:#define FWINV(bool, invflg) ((bool) ^ !!(arpinfo->invflags & (invflg))) net/ipv4/netfilter/ip_tables.c:#define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg))) net/ipv6/netfilter/ip6_tables.c:#define FWINV(bool, invflg) ((bool) ^ !!(ip6info->invflags & (invflg))) net/netfilter/xt_tcpudp.c:#define FWINVTCP(bool, invflg) ((bool) ^ !!(tcpinfo->invflags & (invflg))) Consolidate these macros into a single NF_INVF macro. Miscellanea: o Neaten the alignment around these uses o A few lines are > 80 columns for intelligibility Signed-off-by: Joe Perches Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 4 +++ include/linux/netfilter_bridge/ebtables.h | 2 -- net/bridge/netfilter/ebt_802_3.c | 6 ++-- net/bridge/netfilter/ebt_arp.c | 38 +++++++++++----------- net/bridge/netfilter/ebt_ip.c | 28 ++++++++--------- net/bridge/netfilter/ebt_ip6.c | 41 +++++++++++++----------- net/bridge/netfilter/ebt_stp.c | 52 ++++++++++++++++--------------- net/bridge/netfilter/ebtables.c | 27 ++++++++-------- net/ipv4/netfilter/arp_tables.c | 41 ++++++++++++------------ net/ipv4/netfilter/ip_tables.c | 20 ++++++------ net/ipv6/netfilter/ip6_tables.c | 16 +++++----- net/netfilter/xt_tcpudp.c | 7 ++--- 12 files changed, 144 insertions(+), 138 deletions(-) (limited to 'net/netfilter') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index dc4f58a3cdcc..e94e81ab2b58 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -6,6 +6,10 @@ #include #include +/* Test a struct->invflags and a boolean for inequality */ +#define NF_INVF(ptr, flag, boolean) \ + ((boolean) ^ !!((ptr)->invflags & (flag))) + /** * struct xt_action_param - parameters for matches/targets * diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 2ea517c7c6b9..984b2112c77b 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -115,8 +115,6 @@ extern unsigned int ebt_do_table(struct sk_buff *skb, const struct nf_hook_state *state, struct ebt_table *table); -/* Used in the kernel match() functions */ -#define FWINV(bool,invflg) ((bool) ^ !!(info->invflags & invflg)) /* True if the hook mask denotes that the rule is in a base chain, * used in the check() functions */ #define BASE_CHAIN (par->hook_mask & (1 << NF_BR_NUMHOOKS)) diff --git a/net/bridge/netfilter/ebt_802_3.c b/net/bridge/netfilter/ebt_802_3.c index 2a449b7ab8fa..5fc4affd9fdb 100644 --- a/net/bridge/netfilter/ebt_802_3.c +++ b/net/bridge/netfilter/ebt_802_3.c @@ -20,16 +20,16 @@ ebt_802_3_mt(const struct sk_buff *skb, struct xt_action_param *par) __be16 type = hdr->llc.ui.ctrl & IS_UI ? hdr->llc.ui.type : hdr->llc.ni.type; if (info->bitmask & EBT_802_3_SAP) { - if (FWINV(info->sap != hdr->llc.ui.ssap, EBT_802_3_SAP)) + if (NF_INVF(info, EBT_802_3_SAP, info->sap != hdr->llc.ui.ssap)) return false; - if (FWINV(info->sap != hdr->llc.ui.dsap, EBT_802_3_SAP)) + if (NF_INVF(info, EBT_802_3_SAP, info->sap != hdr->llc.ui.dsap)) return false; } if (info->bitmask & EBT_802_3_TYPE) { if (!(hdr->llc.ui.dsap == CHECK_TYPE && hdr->llc.ui.ssap == CHECK_TYPE)) return false; - if (FWINV(info->type != type, EBT_802_3_TYPE)) + if (NF_INVF(info, EBT_802_3_TYPE, info->type != type)) return false; } diff --git a/net/bridge/netfilter/ebt_arp.c b/net/bridge/netfilter/ebt_arp.c index cca0a899ee15..227142282b45 100644 --- a/net/bridge/netfilter/ebt_arp.c +++ b/net/bridge/netfilter/ebt_arp.c @@ -25,14 +25,14 @@ ebt_arp_mt(const struct sk_buff *skb, struct xt_action_param *par) ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph); if (ah == NULL) return false; - if (info->bitmask & EBT_ARP_OPCODE && FWINV(info->opcode != - ah->ar_op, EBT_ARP_OPCODE)) + if ((info->bitmask & EBT_ARP_OPCODE) && + NF_INVF(info, EBT_ARP_OPCODE, info->opcode != ah->ar_op)) return false; - if (info->bitmask & EBT_ARP_HTYPE && FWINV(info->htype != - ah->ar_hrd, EBT_ARP_HTYPE)) + if ((info->bitmask & EBT_ARP_HTYPE) && + NF_INVF(info, EBT_ARP_HTYPE, info->htype != ah->ar_hrd)) return false; - if (info->bitmask & EBT_ARP_PTYPE && FWINV(info->ptype != - ah->ar_pro, EBT_ARP_PTYPE)) + if ((info->bitmask & EBT_ARP_PTYPE) && + NF_INVF(info, EBT_ARP_PTYPE, info->ptype != ah->ar_pro)) return false; if (info->bitmask & (EBT_ARP_SRC_IP | EBT_ARP_DST_IP | EBT_ARP_GRAT)) { @@ -51,14 +51,16 @@ ebt_arp_mt(const struct sk_buff *skb, struct xt_action_param *par) sizeof(daddr), &daddr); if (dap == NULL) return false; - if (info->bitmask & EBT_ARP_SRC_IP && - FWINV(info->saddr != (*sap & info->smsk), EBT_ARP_SRC_IP)) + if ((info->bitmask & EBT_ARP_SRC_IP) && + NF_INVF(info, EBT_ARP_SRC_IP, + info->saddr != (*sap & info->smsk))) return false; - if (info->bitmask & EBT_ARP_DST_IP && - FWINV(info->daddr != (*dap & info->dmsk), EBT_ARP_DST_IP)) + if ((info->bitmask & EBT_ARP_DST_IP) && + NF_INVF(info, EBT_ARP_DST_IP, + info->daddr != (*dap & info->dmsk))) return false; - if (info->bitmask & EBT_ARP_GRAT && - FWINV(*dap != *sap, EBT_ARP_GRAT)) + if ((info->bitmask & EBT_ARP_GRAT) && + NF_INVF(info, EBT_ARP_GRAT, *dap != *sap)) return false; } @@ -73,9 +75,9 @@ ebt_arp_mt(const struct sk_buff *skb, struct xt_action_param *par) sizeof(_mac), &_mac); if (mp == NULL) return false; - if (FWINV(!ether_addr_equal_masked(mp, info->smaddr, - info->smmsk), - EBT_ARP_SRC_MAC)) + if (NF_INVF(info, EBT_ARP_SRC_MAC, + !ether_addr_equal_masked(mp, info->smaddr, + info->smmsk))) return false; } @@ -85,9 +87,9 @@ ebt_arp_mt(const struct sk_buff *skb, struct xt_action_param *par) sizeof(_mac), &_mac); if (mp == NULL) return false; - if (FWINV(!ether_addr_equal_masked(mp, info->dmaddr, - info->dmmsk), - EBT_ARP_DST_MAC)) + if (NF_INVF(info, EBT_ARP_DST_MAC, + !ether_addr_equal_masked(mp, info->dmaddr, + info->dmmsk))) return false; } } diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c index 23bca62d58d2..d06968bdf5ec 100644 --- a/net/bridge/netfilter/ebt_ip.c +++ b/net/bridge/netfilter/ebt_ip.c @@ -36,19 +36,19 @@ ebt_ip_mt(const struct sk_buff *skb, struct xt_action_param *par) ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); if (ih == NULL) return false; - if (info->bitmask & EBT_IP_TOS && - FWINV(info->tos != ih->tos, EBT_IP_TOS)) + if ((info->bitmask & EBT_IP_TOS) && + NF_INVF(info, EBT_IP_TOS, info->tos != ih->tos)) return false; - if (info->bitmask & EBT_IP_SOURCE && - FWINV((ih->saddr & info->smsk) != - info->saddr, EBT_IP_SOURCE)) + if ((info->bitmask & EBT_IP_SOURCE) && + NF_INVF(info, EBT_IP_SOURCE, + (ih->saddr & info->smsk) != info->saddr)) return false; if ((info->bitmask & EBT_IP_DEST) && - FWINV((ih->daddr & info->dmsk) != - info->daddr, EBT_IP_DEST)) + NF_INVF(info, EBT_IP_DEST, + (ih->daddr & info->dmsk) != info->daddr)) return false; if (info->bitmask & EBT_IP_PROTO) { - if (FWINV(info->protocol != ih->protocol, EBT_IP_PROTO)) + if (NF_INVF(info, EBT_IP_PROTO, info->protocol != ih->protocol)) return false; if (!(info->bitmask & EBT_IP_DPORT) && !(info->bitmask & EBT_IP_SPORT)) @@ -61,16 +61,16 @@ ebt_ip_mt(const struct sk_buff *skb, struct xt_action_param *par) return false; if (info->bitmask & EBT_IP_DPORT) { u32 dst = ntohs(pptr->dst); - if (FWINV(dst < info->dport[0] || - dst > info->dport[1], - EBT_IP_DPORT)) + if (NF_INVF(info, EBT_IP_DPORT, + dst < info->dport[0] || + dst > info->dport[1])) return false; } if (info->bitmask & EBT_IP_SPORT) { u32 src = ntohs(pptr->src); - if (FWINV(src < info->sport[0] || - src > info->sport[1], - EBT_IP_SPORT)) + if (NF_INVF(info, EBT_IP_SPORT, + src < info->sport[0] || + src > info->sport[1])) return false; } } diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c index 98de6e7fd86d..4617491be41e 100644 --- a/net/bridge/netfilter/ebt_ip6.c +++ b/net/bridge/netfilter/ebt_ip6.c @@ -45,15 +45,18 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par) ih6 = skb_header_pointer(skb, 0, sizeof(_ip6h), &_ip6h); if (ih6 == NULL) return false; - if (info->bitmask & EBT_IP6_TCLASS && - FWINV(info->tclass != ipv6_get_dsfield(ih6), EBT_IP6_TCLASS)) + if ((info->bitmask & EBT_IP6_TCLASS) && + NF_INVF(info, EBT_IP6_TCLASS, + info->tclass != ipv6_get_dsfield(ih6))) return false; - if ((info->bitmask & EBT_IP6_SOURCE && - FWINV(ipv6_masked_addr_cmp(&ih6->saddr, &info->smsk, - &info->saddr), EBT_IP6_SOURCE)) || - (info->bitmask & EBT_IP6_DEST && - FWINV(ipv6_masked_addr_cmp(&ih6->daddr, &info->dmsk, - &info->daddr), EBT_IP6_DEST))) + if (((info->bitmask & EBT_IP6_SOURCE) && + NF_INVF(info, EBT_IP6_SOURCE, + ipv6_masked_addr_cmp(&ih6->saddr, &info->smsk, + &info->saddr))) || + ((info->bitmask & EBT_IP6_DEST) && + NF_INVF(info, EBT_IP6_DEST, + ipv6_masked_addr_cmp(&ih6->daddr, &info->dmsk, + &info->daddr)))) return false; if (info->bitmask & EBT_IP6_PROTO) { uint8_t nexthdr = ih6->nexthdr; @@ -63,7 +66,7 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par) offset_ph = ipv6_skip_exthdr(skb, sizeof(_ip6h), &nexthdr, &frag_off); if (offset_ph == -1) return false; - if (FWINV(info->protocol != nexthdr, EBT_IP6_PROTO)) + if (NF_INVF(info, EBT_IP6_PROTO, info->protocol != nexthdr)) return false; if (!(info->bitmask & (EBT_IP6_DPORT | EBT_IP6_SPORT | EBT_IP6_ICMP6))) @@ -76,22 +79,24 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par) return false; if (info->bitmask & EBT_IP6_DPORT) { u16 dst = ntohs(pptr->tcpudphdr.dst); - if (FWINV(dst < info->dport[0] || - dst > info->dport[1], EBT_IP6_DPORT)) + if (NF_INVF(info, EBT_IP6_DPORT, + dst < info->dport[0] || + dst > info->dport[1])) return false; } if (info->bitmask & EBT_IP6_SPORT) { u16 src = ntohs(pptr->tcpudphdr.src); - if (FWINV(src < info->sport[0] || - src > info->sport[1], EBT_IP6_SPORT)) + if (NF_INVF(info, EBT_IP6_SPORT, + src < info->sport[0] || + src > info->sport[1])) return false; } if ((info->bitmask & EBT_IP6_ICMP6) && - FWINV(pptr->icmphdr.type < info->icmpv6_type[0] || - pptr->icmphdr.type > info->icmpv6_type[1] || - pptr->icmphdr.code < info->icmpv6_code[0] || - pptr->icmphdr.code > info->icmpv6_code[1], - EBT_IP6_ICMP6)) + NF_INVF(info, EBT_IP6_ICMP6, + pptr->icmphdr.type < info->icmpv6_type[0] || + pptr->icmphdr.type > info->icmpv6_type[1] || + pptr->icmphdr.code < info->icmpv6_code[0] || + pptr->icmphdr.code > info->icmpv6_code[1])) return false; } return true; diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c index 45f73d55422f..3140eb912d7e 100644 --- a/net/bridge/netfilter/ebt_stp.c +++ b/net/bridge/netfilter/ebt_stp.c @@ -49,66 +49,68 @@ static bool ebt_filter_config(const struct ebt_stp_info *info, c = &info->config; if ((info->bitmask & EBT_STP_FLAGS) && - FWINV(c->flags != stpc->flags, EBT_STP_FLAGS)) + NF_INVF(info, EBT_STP_FLAGS, c->flags != stpc->flags)) return false; if (info->bitmask & EBT_STP_ROOTPRIO) { v16 = NR16(stpc->root); - if (FWINV(v16 < c->root_priol || v16 > c->root_priou, - EBT_STP_ROOTPRIO)) + if (NF_INVF(info, EBT_STP_ROOTPRIO, + v16 < c->root_priol || v16 > c->root_priou)) return false; } if (info->bitmask & EBT_STP_ROOTADDR) { - if (FWINV(!ether_addr_equal_masked(&stpc->root[2], c->root_addr, - c->root_addrmsk), - EBT_STP_ROOTADDR)) + if (NF_INVF(info, EBT_STP_ROOTADDR, + !ether_addr_equal_masked(&stpc->root[2], + c->root_addr, + c->root_addrmsk))) return false; } if (info->bitmask & EBT_STP_ROOTCOST) { v32 = NR32(stpc->root_cost); - if (FWINV(v32 < c->root_costl || v32 > c->root_costu, - EBT_STP_ROOTCOST)) + if (NF_INVF(info, EBT_STP_ROOTCOST, + v32 < c->root_costl || v32 > c->root_costu)) return false; } if (info->bitmask & EBT_STP_SENDERPRIO) { v16 = NR16(stpc->sender); - if (FWINV(v16 < c->sender_priol || v16 > c->sender_priou, - EBT_STP_SENDERPRIO)) + if (NF_INVF(info, EBT_STP_SENDERPRIO, + v16 < c->sender_priol || v16 > c->sender_priou)) return false; } if (info->bitmask & EBT_STP_SENDERADDR) { - if (FWINV(!ether_addr_equal_masked(&stpc->sender[2], - c->sender_addr, - c->sender_addrmsk), - EBT_STP_SENDERADDR)) + if (NF_INVF(info, EBT_STP_SENDERADDR, + !ether_addr_equal_masked(&stpc->sender[2], + c->sender_addr, + c->sender_addrmsk))) return false; } if (info->bitmask & EBT_STP_PORT) { v16 = NR16(stpc->port); - if (FWINV(v16 < c->portl || v16 > c->portu, EBT_STP_PORT)) + if (NF_INVF(info, EBT_STP_PORT, + v16 < c->portl || v16 > c->portu)) return false; } if (info->bitmask & EBT_STP_MSGAGE) { v16 = NR16(stpc->msg_age); - if (FWINV(v16 < c->msg_agel || v16 > c->msg_ageu, - EBT_STP_MSGAGE)) + if (NF_INVF(info, EBT_STP_MSGAGE, + v16 < c->msg_agel || v16 > c->msg_ageu)) return false; } if (info->bitmask & EBT_STP_MAXAGE) { v16 = NR16(stpc->max_age); - if (FWINV(v16 < c->max_agel || v16 > c->max_ageu, - EBT_STP_MAXAGE)) + if (NF_INVF(info, EBT_STP_MAXAGE, + v16 < c->max_agel || v16 > c->max_ageu)) return false; } if (info->bitmask & EBT_STP_HELLOTIME) { v16 = NR16(stpc->hello_time); - if (FWINV(v16 < c->hello_timel || v16 > c->hello_timeu, - EBT_STP_HELLOTIME)) + if (NF_INVF(info, EBT_STP_HELLOTIME, + v16 < c->hello_timel || v16 > c->hello_timeu)) return false; } if (info->bitmask & EBT_STP_FWDD) { v16 = NR16(stpc->forward_delay); - if (FWINV(v16 < c->forward_delayl || v16 > c->forward_delayu, - EBT_STP_FWDD)) + if (NF_INVF(info, EBT_STP_FWDD, + v16 < c->forward_delayl || v16 > c->forward_delayu)) return false; } return true; @@ -130,8 +132,8 @@ ebt_stp_mt(const struct sk_buff *skb, struct xt_action_param *par) if (memcmp(sp, header, sizeof(header))) return false; - if (info->bitmask & EBT_STP_TYPE && - FWINV(info->type != sp->type, EBT_STP_TYPE)) + if ((info->bitmask & EBT_STP_TYPE) && + NF_INVF(info, EBT_STP_TYPE, info->type != sp->type)) return false; if (sp->type == BPDU_TYPE_CONFIG && diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 5721a25be860..cceac5bb658f 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -121,7 +121,6 @@ ebt_dev_check(const char *entry, const struct net_device *device) return devname[i] != entry[i] && entry[i] != 1; } -#define FWINV2(bool, invflg) ((bool) ^ !!(e->invflags & invflg)) /* process standard matches */ static inline int ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb, @@ -137,34 +136,36 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb, ethproto = h->h_proto; if (e->bitmask & EBT_802_3) { - if (FWINV2(eth_proto_is_802_3(ethproto), EBT_IPROTO)) + if (NF_INVF(e, EBT_IPROTO, eth_proto_is_802_3(ethproto))) return 1; } else if (!(e->bitmask & EBT_NOPROTO) && - FWINV2(e->ethproto != ethproto, EBT_IPROTO)) + NF_INVF(e, EBT_IPROTO, e->ethproto != ethproto)) return 1; - if (FWINV2(ebt_dev_check(e->in, in), EBT_IIN)) + if (NF_INVF(e, EBT_IIN, ebt_dev_check(e->in, in))) return 1; - if (FWINV2(ebt_dev_check(e->out, out), EBT_IOUT)) + if (NF_INVF(e, EBT_IOUT, ebt_dev_check(e->out, out))) return 1; /* rcu_read_lock()ed by nf_hook_slow */ if (in && (p = br_port_get_rcu(in)) != NULL && - FWINV2(ebt_dev_check(e->logical_in, p->br->dev), EBT_ILOGICALIN)) + NF_INVF(e, EBT_ILOGICALIN, + ebt_dev_check(e->logical_in, p->br->dev))) return 1; if (out && (p = br_port_get_rcu(out)) != NULL && - FWINV2(ebt_dev_check(e->logical_out, p->br->dev), EBT_ILOGICALOUT)) + NF_INVF(e, EBT_ILOGICALOUT, + ebt_dev_check(e->logical_out, p->br->dev))) return 1; if (e->bitmask & EBT_SOURCEMAC) { - if (FWINV2(!ether_addr_equal_masked(h->h_source, - e->sourcemac, e->sourcemsk), - EBT_ISOURCE)) + if (NF_INVF(e, EBT_ISOURCE, + !ether_addr_equal_masked(h->h_source, e->sourcemac, + e->sourcemsk))) return 1; } if (e->bitmask & EBT_DESTMAC) { - if (FWINV2(!ether_addr_equal_masked(h->h_dest, - e->destmac, e->destmsk), - EBT_IDEST)) + if (NF_INVF(e, EBT_IDEST, + !ether_addr_equal_masked(h->h_dest, e->destmac, + e->destmsk))) return 1; } return 0; diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 2033f929aa66..c8dd9e26b185 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -89,22 +89,20 @@ static inline int arp_packet_match(const struct arphdr *arphdr, __be32 src_ipaddr, tgt_ipaddr; long ret; -#define FWINV(bool, invflg) ((bool) ^ !!(arpinfo->invflags & (invflg))) - - if (FWINV((arphdr->ar_op & arpinfo->arpop_mask) != arpinfo->arpop, - ARPT_INV_ARPOP)) + if (NF_INVF(arpinfo, ARPT_INV_ARPOP, + (arphdr->ar_op & arpinfo->arpop_mask) != arpinfo->arpop)) return 0; - if (FWINV((arphdr->ar_hrd & arpinfo->arhrd_mask) != arpinfo->arhrd, - ARPT_INV_ARPHRD)) + if (NF_INVF(arpinfo, ARPT_INV_ARPHRD, + (arphdr->ar_hrd & arpinfo->arhrd_mask) != arpinfo->arhrd)) return 0; - if (FWINV((arphdr->ar_pro & arpinfo->arpro_mask) != arpinfo->arpro, - ARPT_INV_ARPPRO)) + if (NF_INVF(arpinfo, ARPT_INV_ARPPRO, + (arphdr->ar_pro & arpinfo->arpro_mask) != arpinfo->arpro)) return 0; - if (FWINV((arphdr->ar_hln & arpinfo->arhln_mask) != arpinfo->arhln, - ARPT_INV_ARPHLN)) + if (NF_INVF(arpinfo, ARPT_INV_ARPHLN, + (arphdr->ar_hln & arpinfo->arhln_mask) != arpinfo->arhln)) return 0; src_devaddr = arpptr; @@ -115,31 +113,32 @@ static inline int arp_packet_match(const struct arphdr *arphdr, arpptr += dev->addr_len; memcpy(&tgt_ipaddr, arpptr, sizeof(u32)); - if (FWINV(arp_devaddr_compare(&arpinfo->src_devaddr, src_devaddr, dev->addr_len), - ARPT_INV_SRCDEVADDR) || - FWINV(arp_devaddr_compare(&arpinfo->tgt_devaddr, tgt_devaddr, dev->addr_len), - ARPT_INV_TGTDEVADDR)) + if (NF_INVF(arpinfo, ARPT_INV_SRCDEVADDR, + arp_devaddr_compare(&arpinfo->src_devaddr, src_devaddr, + dev->addr_len)) || + NF_INVF(arpinfo, ARPT_INV_TGTDEVADDR, + arp_devaddr_compare(&arpinfo->tgt_devaddr, tgt_devaddr, + dev->addr_len))) return 0; - if (FWINV((src_ipaddr & arpinfo->smsk.s_addr) != arpinfo->src.s_addr, - ARPT_INV_SRCIP) || - FWINV(((tgt_ipaddr & arpinfo->tmsk.s_addr) != arpinfo->tgt.s_addr), - ARPT_INV_TGTIP)) + if (NF_INVF(arpinfo, ARPT_INV_SRCIP, + (src_ipaddr & arpinfo->smsk.s_addr) != arpinfo->src.s_addr) || + NF_INVF(arpinfo, ARPT_INV_TGTIP, + (tgt_ipaddr & arpinfo->tmsk.s_addr) != arpinfo->tgt.s_addr)) return 0; /* Look for ifname matches. */ ret = ifname_compare(indev, arpinfo->iniface, arpinfo->iniface_mask); - if (FWINV(ret != 0, ARPT_INV_VIA_IN)) + if (NF_INVF(arpinfo, ARPT_INV_VIA_IN, ret != 0)) return 0; ret = ifname_compare(outdev, arpinfo->outiface, arpinfo->outiface_mask); - if (FWINV(ret != 0, ARPT_INV_VIA_OUT)) + if (NF_INVF(arpinfo, ARPT_INV_VIA_OUT, ret != 0)) return 0; return 1; -#undef FWINV } static inline int arp_checkentry(const struct arpt_arp *arp) diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 54906e0e8e0c..f0df66f54ce6 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -58,32 +58,31 @@ ip_packet_match(const struct iphdr *ip, { unsigned long ret; -#define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg))) - - if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr, - IPT_INV_SRCIP) || - FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr, - IPT_INV_DSTIP)) + if (NF_INVF(ipinfo, IPT_INV_SRCIP, + (ip->saddr & ipinfo->smsk.s_addr) != ipinfo->src.s_addr) || + NF_INVF(ipinfo, IPT_INV_DSTIP, + (ip->daddr & ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr)) return false; ret = ifname_compare_aligned(indev, ipinfo->iniface, ipinfo->iniface_mask); - if (FWINV(ret != 0, IPT_INV_VIA_IN)) + if (NF_INVF(ipinfo, IPT_INV_VIA_IN, ret != 0)) return false; ret = ifname_compare_aligned(outdev, ipinfo->outiface, ipinfo->outiface_mask); - if (FWINV(ret != 0, IPT_INV_VIA_OUT)) + if (NF_INVF(ipinfo, IPT_INV_VIA_OUT, ret != 0)) return false; /* Check specific protocol */ if (ipinfo->proto && - FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) + NF_INVF(ipinfo, IPT_INV_PROTO, ip->protocol != ipinfo->proto)) return false; /* If we have a fragment rule but the packet is not a fragment * then we return zero */ - if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) + if (NF_INVF(ipinfo, IPT_INV_FRAG, + (ipinfo->flags & IPT_F_FRAG) && !isfrag)) return false; return true; @@ -122,7 +121,6 @@ static inline bool unconditional(const struct ipt_entry *e) return e->target_offset == sizeof(struct ipt_entry) && memcmp(&e->ip, &uncond, sizeof(uncond)) == 0; -#undef FWINV } /* for const-correctness */ diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 63e06c3dd319..61ed95054efa 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -73,22 +73,22 @@ ip6_packet_match(const struct sk_buff *skb, unsigned long ret; const struct ipv6hdr *ipv6 = ipv6_hdr(skb); -#define FWINV(bool, invflg) ((bool) ^ !!(ip6info->invflags & (invflg))) - - if (FWINV(ipv6_masked_addr_cmp(&ipv6->saddr, &ip6info->smsk, - &ip6info->src), IP6T_INV_SRCIP) || - FWINV(ipv6_masked_addr_cmp(&ipv6->daddr, &ip6info->dmsk, - &ip6info->dst), IP6T_INV_DSTIP)) + if (NF_INVF(ip6info, IP6T_INV_SRCIP, + ipv6_masked_addr_cmp(&ipv6->saddr, &ip6info->smsk, + &ip6info->src)) || + NF_INVF(ip6info, IP6T_INV_DSTIP, + ipv6_masked_addr_cmp(&ipv6->daddr, &ip6info->dmsk, + &ip6info->dst))) return false; ret = ifname_compare_aligned(indev, ip6info->iniface, ip6info->iniface_mask); - if (FWINV(ret != 0, IP6T_INV_VIA_IN)) + if (NF_INVF(ip6info, IP6T_INV_VIA_IN, ret != 0)) return false; ret = ifname_compare_aligned(outdev, ip6info->outiface, ip6info->outiface_mask); - if (FWINV(ret != 0, IP6T_INV_VIA_OUT)) + if (NF_INVF(ip6info, IP6T_INV_VIA_OUT, ret != 0)) return false; /* ... might want to do something with class and flowlabel here ... */ diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c index c14d4645daa3..ade024c90f4f 100644 --- a/net/netfilter/xt_tcpudp.c +++ b/net/netfilter/xt_tcpudp.c @@ -83,8 +83,6 @@ static bool tcp_mt(const struct sk_buff *skb, struct xt_action_param *par) return false; } -#define FWINVTCP(bool, invflg) ((bool) ^ !!(tcpinfo->invflags & (invflg))) - th = skb_header_pointer(skb, par->thoff, sizeof(_tcph), &_tcph); if (th == NULL) { /* We've been asked to examine this packet, and we @@ -102,9 +100,8 @@ static bool tcp_mt(const struct sk_buff *skb, struct xt_action_param *par) ntohs(th->dest), !!(tcpinfo->invflags & XT_TCP_INV_DSTPT))) return false; - if (!FWINVTCP((((unsigned char *)th)[13] & tcpinfo->flg_mask) - == tcpinfo->flg_cmp, - XT_TCP_INV_FLAGS)) + if (!NF_INVF(tcpinfo, XT_TCP_INV_FLAGS, + (((unsigned char *)th)[13] & tcpinfo->flg_mask) == tcpinfo->flg_cmp)) return false; if (tcpinfo->option) { if (th->doff * 4 < sizeof(_tcph)) { -- cgit v1.2.3 From 8b10cab64c134ffbffac96edd1899d303d3afcac Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Sat, 2 Jul 2016 06:43:14 -0400 Subject: net: simplify and make pkt_type_ok() available for other users Suggested-by: Daniel Borkmann Signed-off-by: Jamal Hadi Salim Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/skbuff.h | 10 ++++++++++ net/netfilter/nft_meta.c | 9 +-------- 2 files changed, 11 insertions(+), 8 deletions(-) (limited to 'net/netfilter') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index dc0fca747c5e..638b0e004310 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -37,6 +37,7 @@ #include #include #include +#include #include /* The interface for checksum offload between the stack and networking drivers @@ -881,6 +882,15 @@ static inline struct rtable *skb_rtable(const struct sk_buff *skb) return (struct rtable *)skb_dst(skb); } +/* For mangling skb->pkt_type from user space side from applications + * such as nft, tc, etc, we only allow a conservative subset of + * possible pkt_types to be set. +*/ +static inline bool skb_pkt_type_ok(u32 ptype) +{ + return ptype <= PACKET_OTHERHOST; +} + void kfree_skb(struct sk_buff *skb); void kfree_skb_list(struct sk_buff *segs); void skb_tx_error(struct sk_buff *skb); diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 16c50b0dd426..03e5e33b5c39 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -199,13 +199,6 @@ err: } EXPORT_SYMBOL_GPL(nft_meta_get_eval); -/* don't change or set _LOOPBACK, _USER, etc. */ -static bool pkt_type_ok(u32 p) -{ - return p == PACKET_HOST || p == PACKET_BROADCAST || - p == PACKET_MULTICAST || p == PACKET_OTHERHOST; -} - void nft_meta_set_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -223,7 +216,7 @@ void nft_meta_set_eval(const struct nft_expr *expr, break; case NFT_META_PKTTYPE: if (skb->pkt_type != value && - pkt_type_ok(value) && pkt_type_ok(skb->pkt_type)) + skb_pkt_type_ok(value) && skb_pkt_type_ok(skb->pkt_type)) skb->pkt_type = value; break; case NFT_META_NFTRACE: -- cgit v1.2.3 From c6ac37d8d8843fb1fdc34e4a2a41a4f027ab670c Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Fri, 1 Jul 2016 16:53:54 +0300 Subject: netfilter: nf_log: fix error on write NONE to logger choice sysctl It is hard to unbind nf-logger: echo NONE > /proc/sys/net/netfilter/nf_log/0 bash: echo: write error: No such file or directory sysctl -w net.netfilter.nf_log.0=NONE sysctl: setting key "net.netfilter.nf_log.0": No such file or directory net.netfilter.nf_log.0 = NONE You need explicitly send '\0', for instance like: echo -e "NONE\0" > /proc/sys/net/netfilter/nf_log/0 That seem to be strange, so fix it using proc_dostring. Now it works fine: modprobe nfnetlink_log echo nfnetlink_log > /proc/sys/net/netfilter/nf_log/0 cat /proc/sys/net/netfilter/nf_log/0 nfnetlink_log echo NONE > /proc/sys/net/netfilter/nf_log/0 cat /proc/sys/net/netfilter/nf_log/0 NONE v2: add missed error check for proc_dostring Signed-off-by: Pavel Tikhomirov Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_log.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 18e325ce6542..aa5847a16713 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -418,16 +418,17 @@ static int nf_log_proc_dostring(struct ctl_table *table, int write, { const struct nf_logger *logger; char buf[NFLOGGER_NAME_LEN]; - size_t size = *lenp; int r = 0; int tindex = (unsigned long)table->extra1; struct net *net = current->nsproxy->net_ns; if (write) { - if (size > sizeof(buf)) - size = sizeof(buf); - if (copy_from_user(buf, buffer, size)) - return -EFAULT; + struct ctl_table tmp = *table; + + tmp.data = buf; + r = proc_dostring(&tmp, write, buffer, lenp, ppos); + if (r) + return r; if (!strcmp(buf, "NONE")) { nf_log_unbind_pf(net, tindex); -- cgit v1.2.3 From be2cef49904b34dd5f75d96bbc8cd8341bab1bc0 Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Fri, 3 Jun 2016 17:56:50 +0200 Subject: ipvs: count pre-established TCP states as active Some users observed that "least connection" distribution algorithm doesn't handle well bursts of TCP connections from reconnecting clients after a node or network failure. This is because the algorithm counts active connection as worth 256 inactive ones where for TCP, "active" only means TCP connections in ESTABLISHED state. In case of a connection burst, new connections are handled before previous ones have finished the three way handshaking so that all are still counted as "inactive", i.e. cheap ones. The become "active" quickly but at that time, all of them are already assigned to one real server (or few), resulting in highly unbalanced distribution. Address this by counting the "pre-established" states as "active". Signed-off-by: Michal Kubecek Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_proto_tcp.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index d7024b2ed769..5117bcb7d2f0 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -395,6 +395,20 @@ static const char *const tcp_state_name_table[IP_VS_TCP_S_LAST+1] = { [IP_VS_TCP_S_LAST] = "BUG!", }; +static const bool tcp_state_active_table[IP_VS_TCP_S_LAST] = { + [IP_VS_TCP_S_NONE] = false, + [IP_VS_TCP_S_ESTABLISHED] = true, + [IP_VS_TCP_S_SYN_SENT] = true, + [IP_VS_TCP_S_SYN_RECV] = true, + [IP_VS_TCP_S_FIN_WAIT] = false, + [IP_VS_TCP_S_TIME_WAIT] = false, + [IP_VS_TCP_S_CLOSE] = false, + [IP_VS_TCP_S_CLOSE_WAIT] = false, + [IP_VS_TCP_S_LAST_ACK] = false, + [IP_VS_TCP_S_LISTEN] = false, + [IP_VS_TCP_S_SYNACK] = true, +}; + #define sNO IP_VS_TCP_S_NONE #define sES IP_VS_TCP_S_ESTABLISHED #define sSS IP_VS_TCP_S_SYN_SENT @@ -418,6 +432,13 @@ static const char * tcp_state_name(int state) return tcp_state_name_table[state] ? tcp_state_name_table[state] : "?"; } +static bool tcp_state_active(int state) +{ + if (state >= IP_VS_TCP_S_LAST) + return false; + return tcp_state_active_table[state]; +} + static struct tcp_states_t tcp_states [] = { /* INPUT */ /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */ @@ -540,12 +561,12 @@ set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, if (dest) { if (!(cp->flags & IP_VS_CONN_F_INACTIVE) && - (new_state != IP_VS_TCP_S_ESTABLISHED)) { + !tcp_state_active(new_state)) { atomic_dec(&dest->activeconns); atomic_inc(&dest->inactconns); cp->flags |= IP_VS_CONN_F_INACTIVE; } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) && - (new_state == IP_VS_TCP_S_ESTABLISHED)) { + tcp_state_active(new_state)) { atomic_inc(&dest->activeconns); atomic_dec(&dest->inactconns); cp->flags &= ~IP_VS_CONN_F_INACTIVE; -- cgit v1.2.3 From 64b87639c9cbeb03e26bc65528416c961b1dde96 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sun, 3 Jul 2016 13:18:43 +0800 Subject: netfilter: conntrack: fix race between nf_conntrack proc read and hash resize MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we do "cat /proc/net/nf_conntrack", and meanwhile resize the conntrack hash table via /sys/module/nf_conntrack/parameters/hashsize, race will happen, because reader can observe a newly allocated hash but the old size (or vice versa). So oops will happen like follows: BUG: unable to handle kernel NULL pointer dereference at 0000000000000017 IP: [] seq_print_acct+0x11/0x50 [nf_conntrack] Call Trace: [] ? ct_seq_show+0x14e/0x340 [nf_conntrack] [] seq_read+0x2cc/0x390 [] proc_reg_read+0x42/0x70 [] __vfs_read+0x37/0x130 [] ? security_file_permission+0xa0/0xc0 [] vfs_read+0x95/0x140 [] SyS_read+0x55/0xc0 [] entry_SYSCALL_64_fastpath+0x1a/0xa4 It is very easy to reproduce this kernel crash. 1. open one shell and input the following cmds: while : ; do echo $RANDOM > /sys/module/nf_conntrack/parameters/hashsize done 2. open more shells and input the following cmds: while : ; do cat /proc/net/nf_conntrack done 3. just wait a monent, oops will happen soon. The solution in this patch is based on Florian's Commit 5e3c61f98175 ("netfilter: conntrack: fix lookup race during hash resize"). And add a wrapper function nf_conntrack_get_ht to get hash and hsize suggested by Florian Westphal. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_core.h | 2 ++ net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c | 14 ++++++++++---- net/netfilter/nf_conntrack_core.c | 17 +++++++++++++++++ net/netfilter/nf_conntrack_standalone.c | 14 +++++++++----- 4 files changed, 38 insertions(+), 9 deletions(-) (limited to 'net/netfilter') diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 3e2f3328945c..79d7ac5c9740 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -51,6 +51,8 @@ bool nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, const struct nf_conntrack_l3proto *l3proto, const struct nf_conntrack_l4proto *l4proto); +void nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize); + /* Find a connection corresponding to a tuple. */ struct nf_conntrack_tuple_hash * nf_conntrack_find_get(struct net *net, diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index c6f3c406f707..63923710f325 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -26,6 +26,8 @@ struct ct_iter_state { struct seq_net_private p; + struct hlist_nulls_head *hash; + unsigned int htable_size; unsigned int bucket; }; @@ -35,10 +37,10 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) struct hlist_nulls_node *n; for (st->bucket = 0; - st->bucket < nf_conntrack_htable_size; + st->bucket < st->htable_size; st->bucket++) { n = rcu_dereference( - hlist_nulls_first_rcu(&nf_conntrack_hash[st->bucket])); + hlist_nulls_first_rcu(&st->hash[st->bucket])); if (!is_a_nulls(n)) return n; } @@ -53,11 +55,11 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq, head = rcu_dereference(hlist_nulls_next_rcu(head)); while (is_a_nulls(head)) { if (likely(get_nulls_value(head) == st->bucket)) { - if (++st->bucket >= nf_conntrack_htable_size) + if (++st->bucket >= st->htable_size) return NULL; } head = rcu_dereference( - hlist_nulls_first_rcu(&nf_conntrack_hash[st->bucket])); + hlist_nulls_first_rcu(&st->hash[st->bucket])); } return head; } @@ -75,7 +77,11 @@ static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos) static void *ct_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { + struct ct_iter_state *st = seq->private; + rcu_read_lock(); + + nf_conntrack_get_ht(&st->hash, &st->htable_size); return ct_get_idx(seq, *pos); } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 153e33ffeeaa..1289e7e5e0de 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -460,6 +460,23 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h, net_eq(net, nf_ct_net(ct)); } +/* must be called with rcu read lock held */ +void nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize) +{ + struct hlist_nulls_head *hptr; + unsigned int sequence, hsz; + + do { + sequence = read_seqcount_begin(&nf_conntrack_generation); + hsz = nf_conntrack_htable_size; + hptr = nf_conntrack_hash; + } while (read_seqcount_retry(&nf_conntrack_generation, sequence)); + + *hash = hptr; + *hsize = hsz; +} +EXPORT_SYMBOL_GPL(nf_conntrack_get_ht); + /* * Warning : * - Caller must take a reference on returned object diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 2aaa188ee961..958a1455ca7f 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -48,6 +48,8 @@ EXPORT_SYMBOL_GPL(print_tuple); struct ct_iter_state { struct seq_net_private p; + struct hlist_nulls_head *hash; + unsigned int htable_size; unsigned int bucket; u_int64_t time_now; }; @@ -58,9 +60,10 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) struct hlist_nulls_node *n; for (st->bucket = 0; - st->bucket < nf_conntrack_htable_size; + st->bucket < st->htable_size; st->bucket++) { - n = rcu_dereference(hlist_nulls_first_rcu(&nf_conntrack_hash[st->bucket])); + n = rcu_dereference( + hlist_nulls_first_rcu(&st->hash[st->bucket])); if (!is_a_nulls(n)) return n; } @@ -75,12 +78,11 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq, head = rcu_dereference(hlist_nulls_next_rcu(head)); while (is_a_nulls(head)) { if (likely(get_nulls_value(head) == st->bucket)) { - if (++st->bucket >= nf_conntrack_htable_size) + if (++st->bucket >= st->htable_size) return NULL; } head = rcu_dereference( - hlist_nulls_first_rcu( - &nf_conntrack_hash[st->bucket])); + hlist_nulls_first_rcu(&st->hash[st->bucket])); } return head; } @@ -102,6 +104,8 @@ static void *ct_seq_start(struct seq_file *seq, loff_t *pos) st->time_now = ktime_get_real_ns(); rcu_read_lock(); + + nf_conntrack_get_ht(&st->hash, &st->htable_size); return ct_get_idx(seq, *pos); } -- cgit v1.2.3 From 474803d37e7fb6291d22cb964014afe457ba5212 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sun, 3 Jul 2016 13:18:44 +0800 Subject: netfilter: cttimeout: unlink timeout obj again when hash resize happen Imagine such situation, nf_conntrack_htable_size now is 4096, we are doing ctnl_untimeout, and iterate on 3000# bucket. Meanwhile, another user try to reduce hash size to 2048, then all nf_conn are removed to the new hashtable. When this hash resize operation finished, we still try to itreate ct begin from 3000# bucket, find nothing to do and just return. We may miss unlinking some timeout objects. And later we will end up with invalid references to timeout object that are already gone. So when we find that hash resize happened, try to unlink timeout objects from the 0# bucket again. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_cttimeout.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index 3c84f14326f5..4cdcd969b64c 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -303,16 +303,24 @@ static void ctnl_untimeout(struct net *net, struct ctnl_timeout *timeout) { struct nf_conntrack_tuple_hash *h; const struct hlist_nulls_node *nn; + unsigned int last_hsize; + spinlock_t *lock; int i; local_bh_disable(); - for (i = 0; i < nf_conntrack_htable_size; i++) { - nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); - if (i < nf_conntrack_htable_size) { - hlist_nulls_for_each_entry(h, nn, &nf_conntrack_hash[i], hnnode) - untimeout(h, timeout); +restart: + last_hsize = nf_conntrack_htable_size; + for (i = 0; i < last_hsize; i++) { + lock = &nf_conntrack_locks[i % CONNTRACK_LOCKS]; + nf_conntrack_lock(lock); + if (last_hsize != nf_conntrack_htable_size) { + spin_unlock(lock); + goto restart; } - spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); + + hlist_nulls_for_each_entry(h, nn, &nf_conntrack_hash[i], hnnode) + untimeout(h, timeout); + spin_unlock(lock); } local_bh_enable(); } -- cgit v1.2.3 From 8786a9716d028083f56f944996883f7d1a05919e Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sun, 3 Jul 2016 13:18:45 +0800 Subject: netfilter: nf_ct_helper: unlink helper again when hash resize happen From: Liping Zhang Similar to ctnl_untimeout, when hash resize happened, we should try to do unhelp from the 0# bucket again. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_helper.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 3a1a88b9bafa..a4294e949cdc 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -409,6 +409,8 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) struct nf_conntrack_expect *exp; const struct hlist_node *next; const struct hlist_nulls_node *nn; + unsigned int last_hsize; + spinlock_t *lock; struct net *net; unsigned int i; @@ -446,13 +448,18 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) rtnl_unlock(); local_bh_disable(); - for (i = 0; i < nf_conntrack_htable_size; i++) { - nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); - if (i < nf_conntrack_htable_size) { - hlist_nulls_for_each_entry(h, nn, &nf_conntrack_hash[i], hnnode) - unhelp(h, me); +restart: + last_hsize = nf_conntrack_htable_size; + for (i = 0; i < last_hsize; i++) { + lock = &nf_conntrack_locks[i % CONNTRACK_LOCKS]; + nf_conntrack_lock(lock); + if (last_hsize != nf_conntrack_htable_size) { + spin_unlock(lock); + goto restart; } - spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); + hlist_nulls_for_each_entry(h, nn, &nf_conntrack_hash[i], hnnode) + unhelp(h, me); + spin_unlock(lock); } local_bh_enable(); } -- cgit v1.2.3 From 242922a027176cd260c5adce4ba6bbfa3a05190c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 3 Jul 2016 20:44:01 +0200 Subject: netfilter: conntrack: simplify early_drop We don't need to acquire the bucket lock during early drop, we can use lockless traveral just like ____nf_conntrack_find. The timer deletion serves as synchronization point, if another cpu attempts to evict same entry, only one will succeed with timer deletion. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 1 + net/netfilter/nf_conntrack_core.c | 95 ++++++++++++++++++------------------ 2 files changed, 48 insertions(+), 48 deletions(-) (limited to 'net/netfilter') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 5d3397f34583..2a5133e214c9 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -301,6 +301,7 @@ void nf_ct_tmpl_free(struct nf_conn *tmpl); #define NF_CT_STAT_INC(net, count) __this_cpu_inc((net)->ct.stat->count) #define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count) +#define NF_CT_STAT_ADD_ATOMIC(net, count, v) this_cpu_add((net)->ct.stat->count, (v)) #define MODULE_ALIAS_NFCT_HELPER(helper) \ MODULE_ALIAS("nfct-helper-" helper) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 1289e7e5e0de..e0e9c9a0f5ba 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -834,67 +834,66 @@ EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken); /* There's a small race here where we may free a just-assured connection. Too bad: we're in trouble anyway. */ -static noinline int early_drop(struct net *net, unsigned int _hash) +static unsigned int early_drop_list(struct net *net, + struct hlist_nulls_head *head) { - /* Use oldest entry, which is roughly LRU */ struct nf_conntrack_tuple_hash *h; - struct nf_conn *tmp; struct hlist_nulls_node *n; - unsigned int i, hash, sequence; - struct nf_conn *ct = NULL; - spinlock_t *lockp; - bool ret = false; + unsigned int drops = 0; + struct nf_conn *tmp; - i = 0; + hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) { + tmp = nf_ct_tuplehash_to_ctrack(h); - local_bh_disable(); -restart: - sequence = read_seqcount_begin(&nf_conntrack_generation); - for (; i < NF_CT_EVICTION_RANGE; i++) { - hash = scale_hash(_hash++); - lockp = &nf_conntrack_locks[hash % CONNTRACK_LOCKS]; - nf_conntrack_lock(lockp); - if (read_seqcount_retry(&nf_conntrack_generation, sequence)) { - spin_unlock(lockp); - goto restart; - } - hlist_nulls_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash], - hnnode) { - tmp = nf_ct_tuplehash_to_ctrack(h); - - if (test_bit(IPS_ASSURED_BIT, &tmp->status) || - !net_eq(nf_ct_net(tmp), net) || - nf_ct_is_dying(tmp)) - continue; - - if (atomic_inc_not_zero(&tmp->ct_general.use)) { - ct = tmp; - break; - } - } + if (test_bit(IPS_ASSURED_BIT, &tmp->status) || + !net_eq(nf_ct_net(tmp), net) || + nf_ct_is_dying(tmp)) + continue; - spin_unlock(lockp); - if (ct) - break; + if (!atomic_inc_not_zero(&tmp->ct_general.use)) + continue; + + /* kill only if still in same netns -- might have moved due to + * SLAB_DESTROY_BY_RCU rules. + * + * We steal the timer reference. If that fails timer has + * already fired or someone else deleted it. Just drop ref + * and move to next entry. + */ + if (net_eq(nf_ct_net(tmp), net) && + nf_ct_is_confirmed(tmp) && + del_timer(&tmp->timeout) && + nf_ct_delete(tmp, 0, 0)) + drops++; + + nf_ct_put(tmp); } - local_bh_enable(); + return drops; +} - if (!ct) - return false; +static noinline int early_drop(struct net *net, unsigned int _hash) +{ + unsigned int i; - /* kill only if in same netns -- might have moved due to - * SLAB_DESTROY_BY_RCU rules - */ - if (net_eq(nf_ct_net(ct), net) && del_timer(&ct->timeout)) { - if (nf_ct_delete(ct, 0, 0)) { - NF_CT_STAT_INC_ATOMIC(net, early_drop); - ret = true; + for (i = 0; i < NF_CT_EVICTION_RANGE; i++) { + struct hlist_nulls_head *ct_hash; + unsigned hash, sequence, drops; + + do { + sequence = read_seqcount_begin(&nf_conntrack_generation); + hash = scale_hash(_hash++); + ct_hash = nf_conntrack_hash; + } while (read_seqcount_retry(&nf_conntrack_generation, sequence)); + + drops = early_drop_list(net, &ct_hash[hash]); + if (drops) { + NF_CT_STAT_ADD_ATOMIC(net, early_drop, drops); + return true; } } - nf_ct_put(ct); - return ret; + return false; } static struct nf_conn * -- cgit v1.2.3 From 7c9664351980aaa6a4b8837a314360b3a4ad382a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 5 Jul 2016 12:07:23 +0200 Subject: netfilter: move nat hlist_head to nf_conn The nat extension structure is 32bytes in size on x86_64: struct nf_conn_nat { struct hlist_node bysource; /* 0 16 */ struct nf_conn * ct; /* 16 8 */ union nf_conntrack_nat_help help; /* 24 4 */ int masq_index; /* 28 4 */ /* size: 32, cachelines: 1, members: 4 */ /* last cacheline: 32 bytes */ }; The hlist is needed to quickly check for possible tuple collisions when installing a new nat binding. Storing this in the extension area has two drawbacks: 1. We need ct backpointer to get the conntrack struct from the extension. 2. When reallocation of extension area occurs we need to fixup the bysource hash head via hlist_replace_rcu. We can avoid both by placing the hlist_head in nf_conn and place nf_conn in the bysource hash rather than the extenstion. We can also remove the ->move support; no other extension needs it. Moving the entire nat extension into nf_conn would be possible as well but then we have to add yet another callback for deletion from the bysource hash table rather than just using nat extension ->destroy hook for this. nf_conn size doesn't increase due to aligment, followup patch replaces hlist_node with single pointer. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 3 +++ include/net/netfilter/nf_conntrack_extend.h | 3 --- include/net/netfilter/nf_nat.h | 2 -- net/netfilter/nf_conntrack_extend.c | 15 ++----------- net/netfilter/nf_nat_core.c | 33 ++++++----------------------- 5 files changed, 12 insertions(+), 44 deletions(-) (limited to 'net/netfilter') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 2a5133e214c9..e5135d8728b4 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -117,6 +117,9 @@ struct nf_conn { /* Extensions */ struct nf_ct_ext *ext; +#if IS_ENABLED(CONFIG_NF_NAT) + struct hlist_node nat_bysource; +#endif /* Storage reserved for other modules, must be the last member */ union nf_conntrack_proto proto; }; diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h index b925395fa5ed..1c3035dda31f 100644 --- a/include/net/netfilter/nf_conntrack_extend.h +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -99,9 +99,6 @@ void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id, struct nf_ct_ext_type { /* Destroys relationships (can be NULL). */ void (*destroy)(struct nf_conn *ct); - /* Called when realloacted (can be NULL). - Contents has already been moved. */ - void (*move)(void *new, void *old); enum nf_ct_ext_id id; diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h index 344b1ab19220..02515f7ed4cc 100644 --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h @@ -29,8 +29,6 @@ struct nf_conn; /* The structure embedded in the conntrack structure. */ struct nf_conn_nat { - struct hlist_node bysource; - struct nf_conn *ct; union nf_conntrack_nat_help help; #if IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV4) || \ IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV6) diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index 1a9545965c0d..02bcf00c2492 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -73,7 +73,7 @@ void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id, size_t var_alloc_len, gfp_t gfp) { struct nf_ct_ext *old, *new; - int i, newlen, newoff; + int newlen, newoff; struct nf_ct_ext_type *t; /* Conntrack must not be confirmed to avoid races on reallocation. */ @@ -99,19 +99,8 @@ void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id, return NULL; if (new != old) { - for (i = 0; i < NF_CT_EXT_NUM; i++) { - if (!__nf_ct_ext_exist(old, i)) - continue; - - rcu_read_lock(); - t = rcu_dereference(nf_ct_ext_types[i]); - if (t && t->move) - t->move((void *)new + new->offset[i], - (void *)old + old->offset[i]); - rcu_read_unlock(); - } kfree_rcu(old, rcu); - ct->ext = new; + rcu_assign_pointer(ct->ext, new); } new->offset[id] = newoff; diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 6877a396f8fc..692534701426 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -198,11 +198,9 @@ find_appropriate_src(struct net *net, const struct nf_nat_range *range) { unsigned int h = hash_by_src(net, tuple); - const struct nf_conn_nat *nat; const struct nf_conn *ct; - hlist_for_each_entry_rcu(nat, &nf_nat_bysource[h], bysource) { - ct = nat->ct; + hlist_for_each_entry_rcu(ct, &nf_nat_bysource[h], nat_bysource) { if (same_src(ct, tuple) && net_eq(net, nf_ct_net(ct)) && nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) { @@ -435,8 +433,7 @@ nf_nat_setup_info(struct nf_conn *ct, spin_lock_bh(&nf_nat_lock); /* nf_conntrack_alter_reply might re-allocate extension aera */ nat = nfct_nat(ct); - nat->ct = ct; - hlist_add_head_rcu(&nat->bysource, + hlist_add_head_rcu(&ct->nat_bysource, &nf_nat_bysource[srchash]); spin_unlock_bh(&nf_nat_lock); } @@ -543,7 +540,7 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data) if (nf_nat_proto_remove(ct, data)) return 1; - if (!nat || !nat->ct) + if (!nat) return 0; /* This netns is being destroyed, and conntrack has nat null binding. @@ -556,9 +553,8 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data) return 1; spin_lock_bh(&nf_nat_lock); - hlist_del_rcu(&nat->bysource); + hlist_del_rcu(&ct->nat_bysource); ct->status &= ~IPS_NAT_DONE_MASK; - nat->ct = NULL; spin_unlock_bh(&nf_nat_lock); add_timer(&ct->timeout); @@ -688,27 +684,13 @@ static void nf_nat_cleanup_conntrack(struct nf_conn *ct) { struct nf_conn_nat *nat = nf_ct_ext_find(ct, NF_CT_EXT_NAT); - if (nat == NULL || nat->ct == NULL) + if (!nat) return; - NF_CT_ASSERT(nat->ct->status & IPS_SRC_NAT_DONE); - - spin_lock_bh(&nf_nat_lock); - hlist_del_rcu(&nat->bysource); - spin_unlock_bh(&nf_nat_lock); -} - -static void nf_nat_move_storage(void *new, void *old) -{ - struct nf_conn_nat *new_nat = new; - struct nf_conn_nat *old_nat = old; - struct nf_conn *ct = old_nat->ct; - - if (!ct || !(ct->status & IPS_SRC_NAT_DONE)) - return; + NF_CT_ASSERT(ct->status & IPS_SRC_NAT_DONE); spin_lock_bh(&nf_nat_lock); - hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource); + hlist_del_rcu(&ct->nat_bysource); spin_unlock_bh(&nf_nat_lock); } @@ -716,7 +698,6 @@ static struct nf_ct_ext_type nat_extend __read_mostly = { .len = sizeof(struct nf_conn_nat), .align = __alignof__(struct nf_conn_nat), .destroy = nf_nat_cleanup_conntrack, - .move = nf_nat_move_storage, .id = NF_CT_EXT_NAT, .flags = NF_CT_EXT_F_PREALLOC, }; -- cgit v1.2.3 From 870190a9ec9075205c0fa795a09fa931694a3ff1 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 5 Jul 2016 12:07:24 +0200 Subject: netfilter: nat: convert nat bysrc hash to rhashtable It did use a fixed-size bucket list plus single lock to protect add/del. Unlike the main conntrack table we only need to add and remove keys. Convert it to rhashtable to get table autosizing and per-bucket locking. The maximum number of entries is -- as before -- tied to the number of conntracks so we do not need another upperlimit. The change does not handle rhashtable_remove_fast error, only possible "error" is -ENOENT, and that is something that can happen legitimetely, e.g. because nat module was inserted at a later time and no src manip took place yet. Tested with http-client-benchmark + httpterm with DNAT and SNAT rules in place. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 3 +- include/net/netfilter/nf_nat.h | 1 + net/netfilter/nf_nat_core.c | 126 +++++++++++++++++++---------------- 3 files changed, 71 insertions(+), 59 deletions(-) (limited to 'net/netfilter') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index e5135d8728b4..a08825b7e955 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -118,7 +119,7 @@ struct nf_conn { struct nf_ct_ext *ext; #if IS_ENABLED(CONFIG_NF_NAT) - struct hlist_node nat_bysource; + struct rhash_head nat_bysource; #endif /* Storage reserved for other modules, must be the last member */ union nf_conntrack_proto proto; diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h index 02515f7ed4cc..c327a431a6f3 100644 --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h @@ -1,5 +1,6 @@ #ifndef _NF_NAT_H #define _NF_NAT_H +#include #include #include #include diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 692534701426..de31818417b8 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -30,17 +30,19 @@ #include #include -static DEFINE_SPINLOCK(nf_nat_lock); - static DEFINE_MUTEX(nf_nat_proto_mutex); static const struct nf_nat_l3proto __rcu *nf_nat_l3protos[NFPROTO_NUMPROTO] __read_mostly; static const struct nf_nat_l4proto __rcu **nf_nat_l4protos[NFPROTO_NUMPROTO] __read_mostly; -static struct hlist_head *nf_nat_bysource __read_mostly; -static unsigned int nf_nat_htable_size __read_mostly; -static unsigned int nf_nat_hash_rnd __read_mostly; +struct nf_nat_conn_key { + const struct net *net; + const struct nf_conntrack_tuple *tuple; + const struct nf_conntrack_zone *zone; +}; + +static struct rhashtable nf_nat_bysource_table; inline const struct nf_nat_l3proto * __nf_nat_l3proto_find(u8 family) @@ -119,19 +121,17 @@ int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family) EXPORT_SYMBOL(nf_xfrm_me_harder); #endif /* CONFIG_XFRM */ -/* We keep an extra hash for each conntrack, for fast searching. */ -static inline unsigned int -hash_by_src(const struct net *n, const struct nf_conntrack_tuple *tuple) +static u32 nf_nat_bysource_hash(const void *data, u32 len, u32 seed) { - unsigned int hash; - - get_random_once(&nf_nat_hash_rnd, sizeof(nf_nat_hash_rnd)); + const struct nf_conntrack_tuple *t; + const struct nf_conn *ct = data; + t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; /* Original src, to ensure we map it consistently if poss. */ - hash = jhash2((u32 *)&tuple->src, sizeof(tuple->src) / sizeof(u32), - tuple->dst.protonum ^ nf_nat_hash_rnd ^ net_hash_mix(n)); - return reciprocal_scale(hash, nf_nat_htable_size); + seed ^= net_hash_mix(nf_ct_net(ct)); + return jhash2((const u32 *)&t->src, sizeof(t->src) / sizeof(u32), + t->dst.protonum ^ seed); } /* Is this tuple already taken? (not by us) */ @@ -187,6 +187,26 @@ same_src(const struct nf_conn *ct, t->src.u.all == tuple->src.u.all); } +static int nf_nat_bysource_cmp(struct rhashtable_compare_arg *arg, + const void *obj) +{ + const struct nf_nat_conn_key *key = arg->key; + const struct nf_conn *ct = obj; + + return same_src(ct, key->tuple) && + net_eq(nf_ct_net(ct), key->net) && + nf_ct_zone_equal(ct, key->zone, IP_CT_DIR_ORIGINAL); +} + +static struct rhashtable_params nf_nat_bysource_params = { + .head_offset = offsetof(struct nf_conn, nat_bysource), + .obj_hashfn = nf_nat_bysource_hash, + .obj_cmpfn = nf_nat_bysource_cmp, + .nelem_hint = 256, + .min_size = 1024, + .nulls_base = (1U << RHT_BASE_SHIFT), +}; + /* Only called for SRC manip */ static int find_appropriate_src(struct net *net, @@ -197,23 +217,23 @@ find_appropriate_src(struct net *net, struct nf_conntrack_tuple *result, const struct nf_nat_range *range) { - unsigned int h = hash_by_src(net, tuple); const struct nf_conn *ct; + struct nf_nat_conn_key key = { + .net = net, + .tuple = tuple, + .zone = zone + }; - hlist_for_each_entry_rcu(ct, &nf_nat_bysource[h], nat_bysource) { - if (same_src(ct, tuple) && - net_eq(net, nf_ct_net(ct)) && - nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) { - /* Copy source part from reply tuple. */ - nf_ct_invert_tuplepr(result, - &ct->tuplehash[IP_CT_DIR_REPLY].tuple); - result->dst = tuple->dst; - - if (in_range(l3proto, l4proto, result, range)) - return 1; - } - } - return 0; + ct = rhashtable_lookup_fast(&nf_nat_bysource_table, &key, + nf_nat_bysource_params); + if (!ct) + return 0; + + nf_ct_invert_tuplepr(result, + &ct->tuplehash[IP_CT_DIR_REPLY].tuple); + result->dst = tuple->dst; + + return in_range(l3proto, l4proto, result, range); } /* For [FUTURE] fragmentation handling, we want the least-used @@ -385,7 +405,6 @@ nf_nat_setup_info(struct nf_conn *ct, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype) { - struct net *net = nf_ct_net(ct); struct nf_conntrack_tuple curr_tuple, new_tuple; struct nf_conn_nat *nat; @@ -426,16 +445,13 @@ nf_nat_setup_info(struct nf_conn *ct, } if (maniptype == NF_NAT_MANIP_SRC) { - unsigned int srchash; - - srchash = hash_by_src(net, - &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); - spin_lock_bh(&nf_nat_lock); - /* nf_conntrack_alter_reply might re-allocate extension aera */ - nat = nfct_nat(ct); - hlist_add_head_rcu(&ct->nat_bysource, - &nf_nat_bysource[srchash]); - spin_unlock_bh(&nf_nat_lock); + int err; + + err = rhashtable_insert_fast(&nf_nat_bysource_table, + &ct->nat_bysource, + nf_nat_bysource_params); + if (err) + return NF_DROP; } /* It's done. */ @@ -552,10 +568,10 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data) if (!del_timer(&ct->timeout)) return 1; - spin_lock_bh(&nf_nat_lock); - hlist_del_rcu(&ct->nat_bysource); ct->status &= ~IPS_NAT_DONE_MASK; - spin_unlock_bh(&nf_nat_lock); + + rhashtable_remove_fast(&nf_nat_bysource_table, &ct->nat_bysource, + nf_nat_bysource_params); add_timer(&ct->timeout); @@ -687,11 +703,8 @@ static void nf_nat_cleanup_conntrack(struct nf_conn *ct) if (!nat) return; - NF_CT_ASSERT(ct->status & IPS_SRC_NAT_DONE); - - spin_lock_bh(&nf_nat_lock); - hlist_del_rcu(&ct->nat_bysource); - spin_unlock_bh(&nf_nat_lock); + rhashtable_remove_fast(&nf_nat_bysource_table, &ct->nat_bysource, + nf_nat_bysource_params); } static struct nf_ct_ext_type nat_extend __read_mostly = { @@ -826,16 +839,13 @@ static int __init nf_nat_init(void) { int ret; - /* Leave them the same for the moment. */ - nf_nat_htable_size = nf_conntrack_htable_size; - - nf_nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, 0); - if (!nf_nat_bysource) - return -ENOMEM; + ret = rhashtable_init(&nf_nat_bysource_table, &nf_nat_bysource_params); + if (ret) + return ret; ret = nf_ct_extend_register(&nat_extend); if (ret < 0) { - nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size); + rhashtable_destroy(&nf_nat_bysource_table); printk(KERN_ERR "nf_nat_core: Unable to register extension\n"); return ret; } @@ -859,7 +869,7 @@ static int __init nf_nat_init(void) return 0; cleanup_extend: - nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size); + rhashtable_destroy(&nf_nat_bysource_table); nf_ct_extend_unregister(&nat_extend); return ret; } @@ -877,8 +887,8 @@ static void __exit nf_nat_cleanup(void) #endif for (i = 0; i < NFPROTO_NUMPROTO; i++) kfree(nf_nat_l4protos[i]); - synchronize_net(); - nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size); + + rhashtable_destroy(&nf_nat_bysource_table); } MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 47c74456257d2e50ace8c473d358cf4b9c0a912f Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Tue, 5 Jul 2016 20:55:36 +0800 Subject: netfilter: physdev: physdev-is-out should not work with OUTPUT chain physdev_mt() will check skb->nf_bridge first, which was alloced in br_nf_pre_routing. So if we want to use --physdev-out and physdev-is-out, we need to match it in FORWARD or POSTROUTING chain. physdev_mt_check() only checked physdev-out and missed physdev-is-out. Fix it and update the debug message to make it clearer. Signed-off-by: Hangbin Liu Reviewed-by: Marcelo R Leitner Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_physdev.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index 1caaccbc306c..e5f18988aee0 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -102,14 +102,14 @@ static int physdev_mt_check(const struct xt_mtchk_param *par) if (!(info->bitmask & XT_PHYSDEV_OP_MASK) || info->bitmask & ~XT_PHYSDEV_OP_MASK) return -EINVAL; - if (info->bitmask & XT_PHYSDEV_OP_OUT && + if (info->bitmask & (XT_PHYSDEV_OP_OUT | XT_PHYSDEV_OP_ISOUT) && (!(info->bitmask & XT_PHYSDEV_OP_BRIDGED) || info->invert & XT_PHYSDEV_OP_BRIDGED) && par->hook_mask & ((1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) | (1 << NF_INET_POST_ROUTING))) { - pr_info("using --physdev-out in the OUTPUT, FORWARD and " - "POSTROUTING chains for non-bridged traffic is not " - "supported anymore.\n"); + pr_info("using --physdev-out and --physdev-is-out are only" + "supported in the FORWARD and POSTROUTING chains with" + "bridged traffic.\n"); if (par->hook_mask & (1 << NF_INET_LOCAL_OUT)) return -EINVAL; } -- cgit v1.2.3 From 3f8b61b7f9aea414d162821817d89a7a6aae41c3 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Tue, 5 Jul 2016 23:23:00 +0800 Subject: netfilter: nft_ct: make byte/packet expr more friendly If we want to use ct packets expr, and add a rule like follows: # nft add rule filter input ct packets gt 1 counter We will find that no packets will hit it, because nf_conntrack_acct is disabled by default. So It will not work until we enable it manually via "echo 1 > /proc/sys/net/netfilter/nf_conntrack_acct". This is not friendly, so like xt_connbytes do, if the user want to use ct byte/packet expr, enable nf_conntrack_acct automatically. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_ct.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net/netfilter') diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 137e308d5b24..7ce8fd7ace78 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -355,6 +355,9 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, if (err < 0) return err; + if (priv->key == NFT_CT_BYTES || priv->key == NFT_CT_PKTS) + nf_ct_set_acct(ctx->net, true); + return 0; } -- cgit v1.2.3 From 42a55769132fdf4f44bac1471b371d7f80bcde35 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 8 Jul 2016 14:41:49 +0200 Subject: netfilter: nf_tables: get rid of possible_net_t from set and basechain We can pass the netns pointer as parameter to the functions that need to gain access to it. From basechains, I didn't find any client for this field anymore so let's remove this too. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 21 +++++++++++---------- net/netfilter/nf_tables_api.c | 10 ++++------ net/netfilter/nft_hash.c | 20 ++++++++++---------- net/netfilter/nft_lookup.c | 2 +- net/netfilter/nft_rbtree.c | 26 ++++++++++++++------------ 5 files changed, 40 insertions(+), 39 deletions(-) (limited to 'net/netfilter') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 30c1d9489ae2..f2f13399ce44 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -236,7 +236,8 @@ struct nft_expr; * @features: features supported by the implementation */ struct nft_set_ops { - bool (*lookup)(const struct nft_set *set, + bool (*lookup)(const struct net *net, + const struct nft_set *set, const u32 *key, const struct nft_set_ext **ext); bool (*update)(struct nft_set *set, @@ -248,11 +249,14 @@ struct nft_set_ops { struct nft_regs *regs, const struct nft_set_ext **ext); - int (*insert)(const struct nft_set *set, + int (*insert)(const struct net *net, + const struct nft_set *set, const struct nft_set_elem *elem); - void (*activate)(const struct nft_set *set, + void (*activate)(const struct net *net, + const struct nft_set *set, const struct nft_set_elem *elem); - void * (*deactivate)(const struct nft_set *set, + void * (*deactivate)(const struct net *net, + const struct nft_set *set, const struct nft_set_elem *elem); void (*remove)(const struct nft_set *set, const struct nft_set_elem *elem); @@ -295,7 +299,6 @@ void nft_unregister_set(struct nft_set_ops *ops); * @udlen: user data length * @udata: user data * @ops: set ops - * @pnet: network namespace * @flags: set flags * @genmask: generation mask * @klen: key length @@ -318,7 +321,6 @@ struct nft_set { unsigned char *udata; /* runtime data below here */ const struct nft_set_ops *ops ____cacheline_aligned; - possible_net_t pnet; u16 flags:14, genmask:2; u8 klen; @@ -804,7 +806,6 @@ struct nft_stats { * struct nft_base_chain - nf_tables base chain * * @ops: netfilter hook ops - * @pnet: net namespace that this chain belongs to * @type: chain type * @policy: default policy * @stats: per-cpu chain stats @@ -813,7 +814,6 @@ struct nft_stats { */ struct nft_base_chain { struct nf_hook_ops ops[NFT_HOOK_OPS_MAX]; - possible_net_t pnet; const struct nf_chain_type *type; u8 policy; u8 flags; @@ -1009,10 +1009,11 @@ static inline bool nft_set_elem_active(const struct nft_set_ext *ext, return !(ext->genmask & genmask); } -static inline void nft_set_elem_change_active(const struct nft_set *set, +static inline void nft_set_elem_change_active(const struct net *net, + const struct nft_set *set, struct nft_set_ext *ext) { - ext->genmask ^= nft_genmask_next(read_pnet(&set->pnet)); + ext->genmask ^= nft_genmask_next(net); } /* diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 18b7f8578ee0..0211eaec9060 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1405,7 +1405,6 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, rcu_assign_pointer(basechain->stats, stats); } - write_pnet(&basechain->pnet, net); basechain->type = type; chain = &basechain->chain; @@ -2841,7 +2840,6 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, } INIT_LIST_HEAD(&set->bindings); - write_pnet(&set->pnet, net); set->ops = ops; set->ktype = ktype; set->klen = desc.klen; @@ -3520,7 +3518,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, goto err4; ext->genmask = nft_genmask_cur(ctx->net) | NFT_SET_ELEM_BUSY_MASK; - err = set->ops->insert(set, &elem); + err = set->ops->insert(ctx->net, set, &elem); if (err < 0) goto err5; @@ -3644,7 +3642,7 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, goto err3; } - priv = set->ops->deactivate(set, &elem); + priv = set->ops->deactivate(ctx->net, set, &elem); if (priv == NULL) { err = -ENOENT; goto err4; @@ -4018,7 +4016,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) case NFT_MSG_NEWSETELEM: te = (struct nft_trans_elem *)trans->data; - te->set->ops->activate(te->set, &te->elem); + te->set->ops->activate(net, te->set, &te->elem); nf_tables_setelem_notify(&trans->ctx, te->set, &te->elem, NFT_MSG_NEWSETELEM, 0); @@ -4143,7 +4141,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb) case NFT_MSG_DELSETELEM: te = (struct nft_trans_elem *)trans->data; - te->set->ops->activate(te->set, &te->elem); + te->set->ops->activate(net, te->set, &te->elem); te->set->ndeact--; nft_trans_destroy(trans); diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index ea924816b7b8..564fa7929ed5 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -71,13 +71,13 @@ static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg, return 0; } -static bool nft_hash_lookup(const struct nft_set *set, const u32 *key, - const struct nft_set_ext **ext) +static bool nft_hash_lookup(const struct net *net, const struct nft_set *set, + const u32 *key, const struct nft_set_ext **ext) { struct nft_hash *priv = nft_set_priv(set); const struct nft_hash_elem *he; struct nft_hash_cmp_arg arg = { - .genmask = nft_genmask_cur(read_pnet(&set->pnet)), + .genmask = nft_genmask_cur(net), .set = set, .key = key, }; @@ -125,13 +125,13 @@ err1: return false; } -static int nft_hash_insert(const struct nft_set *set, +static int nft_hash_insert(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem) { struct nft_hash *priv = nft_set_priv(set); struct nft_hash_elem *he = elem->priv; struct nft_hash_cmp_arg arg = { - .genmask = nft_genmask_next(read_pnet(&set->pnet)), + .genmask = nft_genmask_next(net), .set = set, .key = elem->key.val.data, }; @@ -140,20 +140,20 @@ static int nft_hash_insert(const struct nft_set *set, nft_hash_params); } -static void nft_hash_activate(const struct nft_set *set, +static void nft_hash_activate(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem) { struct nft_hash_elem *he = elem->priv; - nft_set_elem_change_active(set, &he->ext); + nft_set_elem_change_active(net, set, &he->ext); nft_set_elem_clear_busy(&he->ext); } -static void *nft_hash_deactivate(const struct nft_set *set, +static void *nft_hash_deactivate(const struct net *net, + const struct nft_set *set, const struct nft_set_elem *elem) { struct nft_hash *priv = nft_set_priv(set); - struct net *net = read_pnet(&set->pnet); struct nft_hash_elem *he; struct nft_hash_cmp_arg arg = { .genmask = nft_genmask_next(net), @@ -166,7 +166,7 @@ static void *nft_hash_deactivate(const struct nft_set *set, if (he != NULL) { if (!nft_set_elem_mark_busy(&he->ext) || !nft_is_active(net, &he->ext)) - nft_set_elem_change_active(set, &he->ext); + nft_set_elem_change_active(net, set, &he->ext); else he = NULL; } diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index b8d18f598569..e164325d1bc0 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -35,7 +35,7 @@ static void nft_lookup_eval(const struct nft_expr *expr, const struct nft_set_ext *ext; bool found; - found = set->ops->lookup(set, ®s->data[priv->sreg], &ext) ^ + found = set->ops->lookup(pkt->net, set, ®s->data[priv->sreg], &ext) ^ priv->invert; if (!found) { diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c index c0f638745adc..6473936d05c6 100644 --- a/net/netfilter/nft_rbtree.c +++ b/net/netfilter/nft_rbtree.c @@ -41,13 +41,13 @@ static bool nft_rbtree_equal(const struct nft_set *set, const void *this, return memcmp(this, nft_set_ext_key(&interval->ext), set->klen) == 0; } -static bool nft_rbtree_lookup(const struct nft_set *set, const u32 *key, - const struct nft_set_ext **ext) +static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set, + const u32 *key, const struct nft_set_ext **ext) { const struct nft_rbtree *priv = nft_set_priv(set); const struct nft_rbtree_elem *rbe, *interval = NULL; + u8 genmask = nft_genmask_cur(net); const struct rb_node *parent; - u8 genmask = nft_genmask_cur(read_pnet(&set->pnet)); const void *this; int d; @@ -93,13 +93,13 @@ out: return false; } -static int __nft_rbtree_insert(const struct nft_set *set, +static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, struct nft_rbtree_elem *new) { struct nft_rbtree *priv = nft_set_priv(set); + u8 genmask = nft_genmask_next(net); struct nft_rbtree_elem *rbe; struct rb_node *parent, **p; - u8 genmask = nft_genmask_next(read_pnet(&set->pnet)); int d; parent = NULL; @@ -132,14 +132,14 @@ static int __nft_rbtree_insert(const struct nft_set *set, return 0; } -static int nft_rbtree_insert(const struct nft_set *set, +static int nft_rbtree_insert(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem) { struct nft_rbtree_elem *rbe = elem->priv; int err; spin_lock_bh(&nft_rbtree_lock); - err = __nft_rbtree_insert(set, rbe); + err = __nft_rbtree_insert(net, set, rbe); spin_unlock_bh(&nft_rbtree_lock); return err; @@ -156,21 +156,23 @@ static void nft_rbtree_remove(const struct nft_set *set, spin_unlock_bh(&nft_rbtree_lock); } -static void nft_rbtree_activate(const struct nft_set *set, +static void nft_rbtree_activate(const struct net *net, + const struct nft_set *set, const struct nft_set_elem *elem) { struct nft_rbtree_elem *rbe = elem->priv; - nft_set_elem_change_active(set, &rbe->ext); + nft_set_elem_change_active(net, set, &rbe->ext); } -static void *nft_rbtree_deactivate(const struct nft_set *set, +static void *nft_rbtree_deactivate(const struct net *net, + const struct nft_set *set, const struct nft_set_elem *elem) { const struct nft_rbtree *priv = nft_set_priv(set); const struct rb_node *parent = priv->root.rb_node; struct nft_rbtree_elem *rbe, *this = elem->priv; - u8 genmask = nft_genmask_next(read_pnet(&set->pnet)); + u8 genmask = nft_genmask_next(net); int d; while (parent != NULL) { @@ -196,7 +198,7 @@ static void *nft_rbtree_deactivate(const struct nft_set *set, parent = parent->rb_right; continue; } - nft_set_elem_change_active(set, &rbe->ext); + nft_set_elem_change_active(net, set, &rbe->ext); return rbe; } } -- cgit v1.2.3 From c2b9b4fee8ab86f2bb657e5ac48d803879e92765 Mon Sep 17 00:00:00 2001 From: Toby DiPasquale Date: Mon, 11 Jul 2016 11:32:45 +0100 Subject: netfilter: nf_conntrack_h323: fix off-by-one in DecodeQ931 This patch corrects an off-by-one error in the DecodeQ931 function in the nf_conntrack_h323 module. This error could result in reading off the end of a Q.931 frame. Signed-off-by: Toby DiPasquale Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_h323_asn1.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c index bcd5ed6b7130..89b2e46925c4 100644 --- a/net/netfilter/nf_conntrack_h323_asn1.c +++ b/net/netfilter/nf_conntrack_h323_asn1.c @@ -846,9 +846,10 @@ int DecodeQ931(unsigned char *buf, size_t sz, Q931 *q931) sz -= len; /* Message Type */ - if (sz < 1) + if (sz < 2) return H323_ERROR_BOUND; q931->MessageType = *p++; + sz--; PRINT("MessageType = %02X\n", q931->MessageType); if (*p & 0x80) { p++; -- cgit v1.2.3 From 3101e0fc1f6e809d38fbb5845c6c5eb0eefeda07 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Tue, 12 Jul 2016 19:45:00 +0800 Subject: netfilter: conntrack: protect early_drop by rcu read lock User can add ct entry via nfnetlink(IPCTNL_MSG_CT_NEW), and if the total number reach the nf_conntrack_max, we will try to drop some ct entries. But in this case(the main function call path is ctnetlink_create_conntrack -> nf_conntrack_alloc -> early_drop), rcu_read_lock is not held, so race with hash resize will happen. Fixes: 242922a02717 ("netfilter: conntrack: simplify early_drop") Cc: Florian Westphal Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index e0e9c9a0f5ba..2d46225501c1 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -880,6 +880,7 @@ static noinline int early_drop(struct net *net, unsigned int _hash) struct hlist_nulls_head *ct_hash; unsigned hash, sequence, drops; + rcu_read_lock(); do { sequence = read_seqcount_begin(&nf_conntrack_generation); hash = scale_hash(_hash++); @@ -887,6 +888,8 @@ static noinline int early_drop(struct net *net, unsigned int _hash) } while (read_seqcount_retry(&nf_conntrack_generation, sequence)); drops = early_drop_list(net, &ct_hash[hash]); + rcu_read_unlock(); + if (drops) { NF_CT_STAT_ADD_ATOMIC(net, early_drop, drops); return true; -- cgit v1.2.3 From f4dc77713f8016d2e8a3295e1c9c53a21f296def Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 14 Jul 2016 17:51:26 +0200 Subject: netfilter: x_tables: speed up jump target validation The dummy ruleset I used to test the original validation change was broken, most rules were unreachable and were not tested by mark_source_chains(). In some cases rulesets that used to load in a few seconds now require several minutes. sample ruleset that shows the behaviour: echo "*filter" for i in $(seq 0 100000);do printf ":chain_%06x - [0:0]\n" $i done for i in $(seq 0 100000);do printf -- "-A INPUT -j chain_%06x\n" $i printf -- "-A INPUT -j chain_%06x\n" $i printf -- "-A INPUT -j chain_%06x\n" $i done echo COMMIT [ pipe result into iptables-restore ] This ruleset will be about 74mbyte in size, with ~500k searches though all 500k[1] rule entries. iptables-restore will take forever (gave up after 10 minutes) Instead of always searching the entire blob for a match, fill an array with the start offsets of every single ipt_entry struct, then do a binary search to check if the jump target is present or not. After this change ruleset restore times get again close to what one gets when reverting 36472341017529e (~3 seconds on my workstation). [1] every user-defined rule gets an implicit RETURN, so we get 300k jumps + 100k userchains + 100k returns -> 500k rule entries Fixes: 36472341017529e ("netfilter: x_tables: validate targets of jumps") Reported-by: Jeff Wu Tested-by: Jeff Wu Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 4 +++ net/ipv4/netfilter/arp_tables.c | 47 ++++++++++++++++++----------------- net/ipv4/netfilter/ip_tables.c | 45 ++++++++++++++++++---------------- net/ipv6/netfilter/ip6_tables.c | 45 ++++++++++++++++++---------------- net/netfilter/x_tables.c | 50 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 127 insertions(+), 64 deletions(-) (limited to 'net/netfilter') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index e94e81ab2b58..2ad1a2b289b5 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -250,6 +250,10 @@ int xt_check_entry_offsets(const void *base, const char *elems, unsigned int target_offset, unsigned int next_offset); +unsigned int *xt_alloc_entry_offsets(unsigned int size); +bool xt_find_jump_offset(const unsigned int *offsets, + unsigned int target, unsigned int size); + int xt_check_match(struct xt_mtchk_param *, unsigned int size, u_int8_t proto, bool inv_proto); int xt_check_target(struct xt_tgchk_param *, unsigned int size, u_int8_t proto, diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index c8dd9e26b185..b31df597fd37 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -299,23 +299,12 @@ static inline bool unconditional(const struct arpt_entry *e) memcmp(&e->arp, &uncond, sizeof(uncond)) == 0; } -static bool find_jump_target(const struct xt_table_info *t, - const struct arpt_entry *target) -{ - struct arpt_entry *iter; - - xt_entry_foreach(iter, t->entries, t->size) { - if (iter == target) - return true; - } - return false; -} - /* Figures out from what hook each rule can be called: returns 0 if * there are loops. Puts hook bitmask in comefrom. */ static int mark_source_chains(const struct xt_table_info *newinfo, - unsigned int valid_hooks, void *entry0) + unsigned int valid_hooks, void *entry0, + unsigned int *offsets) { unsigned int hook; @@ -388,10 +377,11 @@ static int mark_source_chains(const struct xt_table_info *newinfo, XT_STANDARD_TARGET) == 0 && newpos >= 0) { /* This a jump; chase it. */ + if (!xt_find_jump_offset(offsets, newpos, + newinfo->number)) + return 0; e = (struct arpt_entry *) (entry0 + newpos); - if (!find_jump_target(newinfo, e)) - return 0; } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; @@ -543,6 +533,7 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0, const struct arpt_replace *repl) { struct arpt_entry *iter; + unsigned int *offsets; unsigned int i; int ret = 0; @@ -555,6 +546,9 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0, newinfo->underflow[i] = 0xFFFFFFFF; } + offsets = xt_alloc_entry_offsets(newinfo->number); + if (!offsets) + return -ENOMEM; i = 0; /* Walk through entries, checking offsets. */ @@ -565,17 +559,20 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0, repl->underflow, repl->valid_hooks); if (ret != 0) - break; + goto out_free; + if (i < repl->num_entries) + offsets[i] = (void *)iter - entry0; ++i; if (strcmp(arpt_get_target(iter)->u.user.name, XT_ERROR_TARGET) == 0) ++newinfo->stacksize; } if (ret != 0) - return ret; + goto out_free; + ret = -EINVAL; if (i != repl->num_entries) - return -EINVAL; + goto out_free; /* Check hooks all assigned */ for (i = 0; i < NF_ARP_NUMHOOKS; i++) { @@ -583,13 +580,16 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0, if (!(repl->valid_hooks & (1 << i))) continue; if (newinfo->hook_entry[i] == 0xFFFFFFFF) - return -EINVAL; + goto out_free; if (newinfo->underflow[i] == 0xFFFFFFFF) - return -EINVAL; + goto out_free; } - if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) - return -ELOOP; + if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) { + ret = -ELOOP; + goto out_free; + } + kvfree(offsets); /* Finally, each sanity check must pass */ i = 0; @@ -609,6 +609,9 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0, return ret; } + return ret; + out_free: + kvfree(offsets); return ret; } diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index f0df66f54ce6..f993545a3373 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -373,23 +373,12 @@ ipt_do_table(struct sk_buff *skb, else return verdict; } -static bool find_jump_target(const struct xt_table_info *t, - const struct ipt_entry *target) -{ - struct ipt_entry *iter; - - xt_entry_foreach(iter, t->entries, t->size) { - if (iter == target) - return true; - } - return false; -} - /* Figures out from what hook each rule can be called: returns 0 if there are loops. Puts hook bitmask in comefrom. */ static int mark_source_chains(const struct xt_table_info *newinfo, - unsigned int valid_hooks, void *entry0) + unsigned int valid_hooks, void *entry0, + unsigned int *offsets) { unsigned int hook; @@ -458,10 +447,11 @@ mark_source_chains(const struct xt_table_info *newinfo, XT_STANDARD_TARGET) == 0 && newpos >= 0) { /* This a jump; chase it. */ + if (!xt_find_jump_offset(offsets, newpos, + newinfo->number)) + return 0; e = (struct ipt_entry *) (entry0 + newpos); - if (!find_jump_target(newinfo, e)) - return 0; } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; @@ -694,6 +684,7 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, const struct ipt_replace *repl) { struct ipt_entry *iter; + unsigned int *offsets; unsigned int i; int ret = 0; @@ -706,6 +697,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, newinfo->underflow[i] = 0xFFFFFFFF; } + offsets = xt_alloc_entry_offsets(newinfo->number); + if (!offsets) + return -ENOMEM; i = 0; /* Walk through entries, checking offsets. */ xt_entry_foreach(iter, entry0, newinfo->size) { @@ -715,15 +709,18 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, repl->underflow, repl->valid_hooks); if (ret != 0) - return ret; + goto out_free; + if (i < repl->num_entries) + offsets[i] = (void *)iter - entry0; ++i; if (strcmp(ipt_get_target(iter)->u.user.name, XT_ERROR_TARGET) == 0) ++newinfo->stacksize; } + ret = -EINVAL; if (i != repl->num_entries) - return -EINVAL; + goto out_free; /* Check hooks all assigned */ for (i = 0; i < NF_INET_NUMHOOKS; i++) { @@ -731,13 +728,16 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, if (!(repl->valid_hooks & (1 << i))) continue; if (newinfo->hook_entry[i] == 0xFFFFFFFF) - return -EINVAL; + goto out_free; if (newinfo->underflow[i] == 0xFFFFFFFF) - return -EINVAL; + goto out_free; } - if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) - return -ELOOP; + if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) { + ret = -ELOOP; + goto out_free; + } + kvfree(offsets); /* Finally, each sanity check must pass */ i = 0; @@ -757,6 +757,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, return ret; } + return ret; + out_free: + kvfree(offsets); return ret; } diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 61ed95054efa..552fac2f390a 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -402,23 +402,12 @@ ip6t_do_table(struct sk_buff *skb, else return verdict; } -static bool find_jump_target(const struct xt_table_info *t, - const struct ip6t_entry *target) -{ - struct ip6t_entry *iter; - - xt_entry_foreach(iter, t->entries, t->size) { - if (iter == target) - return true; - } - return false; -} - /* Figures out from what hook each rule can be called: returns 0 if there are loops. Puts hook bitmask in comefrom. */ static int mark_source_chains(const struct xt_table_info *newinfo, - unsigned int valid_hooks, void *entry0) + unsigned int valid_hooks, void *entry0, + unsigned int *offsets) { unsigned int hook; @@ -487,10 +476,11 @@ mark_source_chains(const struct xt_table_info *newinfo, XT_STANDARD_TARGET) == 0 && newpos >= 0) { /* This a jump; chase it. */ + if (!xt_find_jump_offset(offsets, newpos, + newinfo->number)) + return 0; e = (struct ip6t_entry *) (entry0 + newpos); - if (!find_jump_target(newinfo, e)) - return 0; } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; @@ -724,6 +714,7 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, const struct ip6t_replace *repl) { struct ip6t_entry *iter; + unsigned int *offsets; unsigned int i; int ret = 0; @@ -736,6 +727,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, newinfo->underflow[i] = 0xFFFFFFFF; } + offsets = xt_alloc_entry_offsets(newinfo->number); + if (!offsets) + return -ENOMEM; i = 0; /* Walk through entries, checking offsets. */ xt_entry_foreach(iter, entry0, newinfo->size) { @@ -745,15 +739,18 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, repl->underflow, repl->valid_hooks); if (ret != 0) - return ret; + goto out_free; + if (i < repl->num_entries) + offsets[i] = (void *)iter - entry0; ++i; if (strcmp(ip6t_get_target(iter)->u.user.name, XT_ERROR_TARGET) == 0) ++newinfo->stacksize; } + ret = -EINVAL; if (i != repl->num_entries) - return -EINVAL; + goto out_free; /* Check hooks all assigned */ for (i = 0; i < NF_INET_NUMHOOKS; i++) { @@ -761,13 +758,16 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, if (!(repl->valid_hooks & (1 << i))) continue; if (newinfo->hook_entry[i] == 0xFFFFFFFF) - return -EINVAL; + goto out_free; if (newinfo->underflow[i] == 0xFFFFFFFF) - return -EINVAL; + goto out_free; } - if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) - return -ELOOP; + if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) { + ret = -ELOOP; + goto out_free; + } + kvfree(offsets); /* Finally, each sanity check must pass */ i = 0; @@ -787,6 +787,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, return ret; } + return ret; + out_free: + kvfree(offsets); return ret; } diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index fe0e2db632c7..e0aa7c1d0224 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -702,6 +702,56 @@ int xt_check_entry_offsets(const void *base, } EXPORT_SYMBOL(xt_check_entry_offsets); +/** + * xt_alloc_entry_offsets - allocate array to store rule head offsets + * + * @size: number of entries + * + * Return: NULL or kmalloc'd or vmalloc'd array + */ +unsigned int *xt_alloc_entry_offsets(unsigned int size) +{ + unsigned int *off; + + off = kcalloc(size, sizeof(unsigned int), GFP_KERNEL | __GFP_NOWARN); + + if (off) + return off; + + if (size < (SIZE_MAX / sizeof(unsigned int))) + off = vmalloc(size * sizeof(unsigned int)); + + return off; +} +EXPORT_SYMBOL(xt_alloc_entry_offsets); + +/** + * xt_find_jump_offset - check if target is a valid jump offset + * + * @offsets: array containing all valid rule start offsets of a rule blob + * @target: the jump target to search for + * @size: entries in @offset + */ +bool xt_find_jump_offset(const unsigned int *offsets, + unsigned int target, unsigned int size) +{ + int m, low = 0, hi = size; + + while (hi > low) { + m = (low + hi) / 2u; + + if (offsets[m] > target) + hi = m; + else if (offsets[m] < target) + low = m + 1; + else + return true; + } + + return false; +} +EXPORT_SYMBOL(xt_find_jump_offset); + int xt_check_target(struct xt_tgchk_param *par, unsigned int size, u_int8_t proto, bool inv_proto) { -- cgit v1.2.3 From 590025a27fe0603e855a054c4ad57d966bd8af07 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 16 Jul 2016 14:27:21 +0800 Subject: netfilter: nft_ct: fix unpaired nf_connlabels_get/put call We only get nf_connlabels if the user add ct label set expr successfully, but we will also put nf_connlabels if the user delete ct lable get expr. This is mismathced, and will cause ct label expr cannot work properly. Also, if we init something fail, we should put nf_connlabels back. Otherwise, we may waste to alloc the memory that will never be used. Signed-off-by: Liping Zhang Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_ct.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 7ce8fd7ace78..d9e44ca34055 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -366,6 +366,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_ct *priv = nft_expr_priv(expr); + bool label_got = false; unsigned int len; int err; @@ -384,6 +385,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx, err = nf_connlabels_get(ctx->net, (len * BITS_PER_BYTE) - 1); if (err) return err; + label_got = true; break; #endif default: @@ -393,17 +395,28 @@ static int nft_ct_set_init(const struct nft_ctx *ctx, priv->sreg = nft_parse_register(tb[NFTA_CT_SREG]); err = nft_validate_register_load(priv->sreg, len); if (err < 0) - return err; + goto err1; err = nft_ct_l3proto_try_module_get(ctx->afi->family); if (err < 0) - return err; + goto err1; return 0; + +err1: + if (label_got) + nf_connlabels_put(ctx->net); + return err; +} + +static void nft_ct_get_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + nft_ct_l3proto_module_put(ctx->afi->family); } -static void nft_ct_destroy(const struct nft_ctx *ctx, - const struct nft_expr *expr) +static void nft_ct_set_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr) { struct nft_ct *priv = nft_expr_priv(expr); @@ -475,7 +488,7 @@ static const struct nft_expr_ops nft_ct_get_ops = { .size = NFT_EXPR_SIZE(sizeof(struct nft_ct)), .eval = nft_ct_get_eval, .init = nft_ct_get_init, - .destroy = nft_ct_destroy, + .destroy = nft_ct_get_destroy, .dump = nft_ct_get_dump, }; @@ -484,7 +497,7 @@ static const struct nft_expr_ops nft_ct_set_ops = { .size = NFT_EXPR_SIZE(sizeof(struct nft_ct)), .eval = nft_ct_set_eval, .init = nft_ct_set_init, - .destroy = nft_ct_destroy, + .destroy = nft_ct_set_destroy, .dump = nft_ct_set_dump, }; -- cgit v1.2.3 From 82de0be6862cdca2e6802267bda57cfc8844d3a7 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Mon, 18 Jul 2016 11:39:23 +0800 Subject: netfilter: Add helper array register/unregister functions Add nf_ct_helper_init(), nf_conntrack_helpers_register() and nf_conntrack_helpers_unregister() functions to avoid repetitive opencoded initialization in helpers. This patch keeps an id parameter for nf_ct_helper_init() not to break helper matching by name that has been inconsistently exposed to userspace through ports, eg. ftp-2121, and through an incremental id, eg. tftp-1. Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_helper.h | 15 ++++++ net/netfilter/nf_conntrack_ftp.c | 58 +++++++--------------- net/netfilter/nf_conntrack_helper.c | 57 ++++++++++++++++++++++ net/netfilter/nf_conntrack_irc.c | 36 +++++--------- net/netfilter/nf_conntrack_sane.c | 57 ++++++++-------------- net/netfilter/nf_conntrack_sip.c | 75 +++++++++++------------------ net/netfilter/nf_conntrack_tftp.c | 48 ++++++------------ 7 files changed, 165 insertions(+), 181 deletions(-) (limited to 'net/netfilter') diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index 6cf614bc0029..1eaac1f4cd6a 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -58,10 +58,25 @@ struct nf_conntrack_helper *__nf_conntrack_helper_find(const char *name, struct nf_conntrack_helper *nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum); +void nf_ct_helper_init(struct nf_conntrack_helper *helper, + u16 l3num, u16 protonum, const char *name, + u16 default_port, u16 spec_port, u32 id, + const struct nf_conntrack_expect_policy *exp_pol, + u32 expect_class_max, u32 data_len, + int (*help)(struct sk_buff *skb, unsigned int protoff, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo), + int (*from_nlattr)(struct nlattr *attr, + struct nf_conn *ct), + struct module *module); int nf_conntrack_helper_register(struct nf_conntrack_helper *); void nf_conntrack_helper_unregister(struct nf_conntrack_helper *); +int nf_conntrack_helpers_register(struct nf_conntrack_helper *, unsigned int); +void nf_conntrack_helpers_unregister(struct nf_conntrack_helper *, + unsigned int); + struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, struct nf_conntrack_helper *helper, gfp_t gfp); diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 19efeba02abb..43147005bea3 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -572,7 +572,7 @@ static int nf_ct_ftp_from_nlattr(struct nlattr *attr, struct nf_conn *ct) return 0; } -static struct nf_conntrack_helper ftp[MAX_PORTS][2] __read_mostly; +static struct nf_conntrack_helper ftp[MAX_PORTS * 2] __read_mostly; static const struct nf_conntrack_expect_policy ftp_exp_policy = { .max_expected = 1, @@ -582,24 +582,13 @@ static const struct nf_conntrack_expect_policy ftp_exp_policy = { /* don't make this __exit, since it's called from __init ! */ static void nf_conntrack_ftp_fini(void) { - int i, j; - for (i = 0; i < ports_c; i++) { - for (j = 0; j < 2; j++) { - if (ftp[i][j].me == NULL) - continue; - - pr_debug("unregistering helper for pf: %d port: %d\n", - ftp[i][j].tuple.src.l3num, ports[i]); - nf_conntrack_helper_unregister(&ftp[i][j]); - } - } - + nf_conntrack_helpers_unregister(ftp, ports_c * 2); kfree(ftp_buffer); } static int __init nf_conntrack_ftp_init(void) { - int i, j = -1, ret = 0; + int i, ret = 0; ftp_buffer = kmalloc(65536, GFP_KERNEL); if (!ftp_buffer) @@ -611,32 +600,21 @@ static int __init nf_conntrack_ftp_init(void) /* FIXME should be configurable whether IPv4 and IPv6 FTP connections are tracked or not - YK */ for (i = 0; i < ports_c; i++) { - ftp[i][0].tuple.src.l3num = PF_INET; - ftp[i][1].tuple.src.l3num = PF_INET6; - for (j = 0; j < 2; j++) { - ftp[i][j].data_len = sizeof(struct nf_ct_ftp_master); - ftp[i][j].tuple.src.u.tcp.port = htons(ports[i]); - ftp[i][j].tuple.dst.protonum = IPPROTO_TCP; - ftp[i][j].expect_policy = &ftp_exp_policy; - ftp[i][j].me = THIS_MODULE; - ftp[i][j].help = help; - ftp[i][j].from_nlattr = nf_ct_ftp_from_nlattr; - if (ports[i] == FTP_PORT) - sprintf(ftp[i][j].name, "ftp"); - else - sprintf(ftp[i][j].name, "ftp-%d", ports[i]); - - pr_debug("registering helper for pf: %d port: %d\n", - ftp[i][j].tuple.src.l3num, ports[i]); - ret = nf_conntrack_helper_register(&ftp[i][j]); - if (ret) { - pr_err("failed to register helper for pf: %d port: %d\n", - ftp[i][j].tuple.src.l3num, ports[i]); - ports_c = i; - nf_conntrack_ftp_fini(); - return ret; - } - } + nf_ct_helper_init(&ftp[2 * i], AF_INET, IPPROTO_TCP, "ftp", + FTP_PORT, ports[i], ports[i], &ftp_exp_policy, + 0, sizeof(struct nf_ct_ftp_master), help, + nf_ct_ftp_from_nlattr, THIS_MODULE); + nf_ct_helper_init(&ftp[2 * i + 1], AF_INET6, IPPROTO_TCP, "ftp", + FTP_PORT, ports[i], ports[i], &ftp_exp_policy, + 0, sizeof(struct nf_ct_ftp_master), help, + nf_ct_ftp_from_nlattr, THIS_MODULE); + } + + ret = nf_conntrack_helpers_register(ftp, ports_c * 2); + if (ret < 0) { + pr_err("failed to register helpers\n"); + kfree(ftp_buffer); + return ret; } return 0; diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index a4294e949cdc..b989b81ac156 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -465,6 +465,63 @@ restart: } EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister); +void nf_ct_helper_init(struct nf_conntrack_helper *helper, + u16 l3num, u16 protonum, const char *name, + u16 default_port, u16 spec_port, u32 id, + const struct nf_conntrack_expect_policy *exp_pol, + u32 expect_class_max, u32 data_len, + int (*help)(struct sk_buff *skb, unsigned int protoff, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo), + int (*from_nlattr)(struct nlattr *attr, + struct nf_conn *ct), + struct module *module) +{ + helper->tuple.src.l3num = l3num; + helper->tuple.dst.protonum = protonum; + helper->tuple.src.u.all = htons(spec_port); + helper->expect_policy = exp_pol; + helper->expect_class_max = expect_class_max; + helper->data_len = data_len; + helper->help = help; + helper->from_nlattr = from_nlattr; + helper->me = module; + + if (spec_port == default_port) + snprintf(helper->name, sizeof(helper->name), "%s", name); + else + snprintf(helper->name, sizeof(helper->name), "%s-%u", name, id); +} +EXPORT_SYMBOL_GPL(nf_ct_helper_init); + +int nf_conntrack_helpers_register(struct nf_conntrack_helper *helper, + unsigned int n) +{ + unsigned int i; + int err = 0; + + for (i = 0; i < n; i++) { + err = nf_conntrack_helper_register(&helper[i]); + if (err < 0) + goto err; + } + + return err; +err: + if (i > 0) + nf_conntrack_helpers_unregister(helper, i); + return err; +} +EXPORT_SYMBOL_GPL(nf_conntrack_helpers_register); + +void nf_conntrack_helpers_unregister(struct nf_conntrack_helper *helper, + unsigned int n) +{ + while (n-- > 0) + nf_conntrack_helper_unregister(&helper[n]); +} +EXPORT_SYMBOL_GPL(nf_conntrack_helpers_unregister); + static struct nf_ct_ext_type helper_extend __read_mostly = { .len = sizeof(struct nf_conn_help), .align = __alignof__(struct nf_conn_help), diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index f97ac61d2536..1972a149f958 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -255,27 +255,18 @@ static int __init nf_conntrack_irc_init(void) ports[ports_c++] = IRC_PORT; for (i = 0; i < ports_c; i++) { - irc[i].tuple.src.l3num = AF_INET; - irc[i].tuple.src.u.tcp.port = htons(ports[i]); - irc[i].tuple.dst.protonum = IPPROTO_TCP; - irc[i].expect_policy = &irc_exp_policy; - irc[i].me = THIS_MODULE; - irc[i].help = help; - - if (ports[i] == IRC_PORT) - sprintf(irc[i].name, "irc"); - else - sprintf(irc[i].name, "irc-%u", i); - - ret = nf_conntrack_helper_register(&irc[i]); - if (ret) { - pr_err("failed to register helper for pf: %u port: %u\n", - irc[i].tuple.src.l3num, ports[i]); - ports_c = i; - nf_conntrack_irc_fini(); - return ret; - } + nf_ct_helper_init(&irc[i], AF_INET, IPPROTO_TCP, "irc", + IRC_PORT, ports[i], i, &irc_exp_policy, + 0, 0, help, NULL, THIS_MODULE); + } + + ret = nf_conntrack_helpers_register(&irc[0], ports_c); + if (ret) { + pr_err("failed to register helpers\n"); + kfree(irc_buffer); + return ret; } + return 0; } @@ -283,10 +274,7 @@ static int __init nf_conntrack_irc_init(void) * it is needed by the init function */ static void nf_conntrack_irc_fini(void) { - int i; - - for (i = 0; i < ports_c; i++) - nf_conntrack_helper_unregister(&irc[i]); + nf_conntrack_helpers_unregister(irc, ports_c); kfree(irc_buffer); } diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c index 3fcbaab83b3d..9dcb9ee9b97d 100644 --- a/net/netfilter/nf_conntrack_sane.c +++ b/net/netfilter/nf_conntrack_sane.c @@ -166,7 +166,7 @@ out: return ret; } -static struct nf_conntrack_helper sane[MAX_PORTS][2] __read_mostly; +static struct nf_conntrack_helper sane[MAX_PORTS * 2] __read_mostly; static const struct nf_conntrack_expect_policy sane_exp_policy = { .max_expected = 1, @@ -176,22 +176,13 @@ static const struct nf_conntrack_expect_policy sane_exp_policy = { /* don't make this __exit, since it's called from __init ! */ static void nf_conntrack_sane_fini(void) { - int i, j; - - for (i = 0; i < ports_c; i++) { - for (j = 0; j < 2; j++) { - pr_debug("unregistering helper for pf: %d port: %d\n", - sane[i][j].tuple.src.l3num, ports[i]); - nf_conntrack_helper_unregister(&sane[i][j]); - } - } - + nf_conntrack_helpers_unregister(sane, ports_c * 2); kfree(sane_buffer); } static int __init nf_conntrack_sane_init(void) { - int i, j = -1, ret = 0; + int i, ret = 0; sane_buffer = kmalloc(65536, GFP_KERNEL); if (!sane_buffer) @@ -203,31 +194,23 @@ static int __init nf_conntrack_sane_init(void) /* FIXME should be configurable whether IPv4 and IPv6 connections are tracked or not - YK */ for (i = 0; i < ports_c; i++) { - sane[i][0].tuple.src.l3num = PF_INET; - sane[i][1].tuple.src.l3num = PF_INET6; - for (j = 0; j < 2; j++) { - sane[i][j].data_len = sizeof(struct nf_ct_sane_master); - sane[i][j].tuple.src.u.tcp.port = htons(ports[i]); - sane[i][j].tuple.dst.protonum = IPPROTO_TCP; - sane[i][j].expect_policy = &sane_exp_policy; - sane[i][j].me = THIS_MODULE; - sane[i][j].help = help; - if (ports[i] == SANE_PORT) - sprintf(sane[i][j].name, "sane"); - else - sprintf(sane[i][j].name, "sane-%d", ports[i]); - - pr_debug("registering helper for pf: %d port: %d\n", - sane[i][j].tuple.src.l3num, ports[i]); - ret = nf_conntrack_helper_register(&sane[i][j]); - if (ret) { - pr_err("failed to register helper for pf: %d port: %d\n", - sane[i][j].tuple.src.l3num, ports[i]); - ports_c = i; - nf_conntrack_sane_fini(); - return ret; - } - } + nf_ct_helper_init(&sane[2 * i], AF_INET, IPPROTO_TCP, "sane", + SANE_PORT, ports[i], ports[i], + &sane_exp_policy, 0, + sizeof(struct nf_ct_sane_master), help, NULL, + THIS_MODULE); + nf_ct_helper_init(&sane[2 * i + 1], AF_INET6, IPPROTO_TCP, "sane", + SANE_PORT, ports[i], ports[i], + &sane_exp_policy, 0, + sizeof(struct nf_ct_sane_master), help, NULL, + THIS_MODULE); + } + + ret = nf_conntrack_helpers_register(sane, ports_c * 2); + if (ret < 0) { + pr_err("failed to register helpers\n"); + kfree(sane_buffer); + return ret; } return 0; diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index f72ba5587588..8d9db9d4702b 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -1589,7 +1589,7 @@ static int sip_help_udp(struct sk_buff *skb, unsigned int protoff, return process_sip_msg(skb, ct, protoff, dataoff, &dptr, &datalen); } -static struct nf_conntrack_helper sip[MAX_PORTS][4] __read_mostly; +static struct nf_conntrack_helper sip[MAX_PORTS * 4] __read_mostly; static const struct nf_conntrack_expect_policy sip_exp_policy[SIP_EXPECT_MAX + 1] = { [SIP_EXPECT_SIGNALLING] = { @@ -1616,20 +1616,12 @@ static const struct nf_conntrack_expect_policy sip_exp_policy[SIP_EXPECT_MAX + 1 static void nf_conntrack_sip_fini(void) { - int i, j; - - for (i = 0; i < ports_c; i++) { - for (j = 0; j < ARRAY_SIZE(sip[i]); j++) { - if (sip[i][j].me == NULL) - continue; - nf_conntrack_helper_unregister(&sip[i][j]); - } - } + nf_conntrack_helpers_unregister(sip, ports_c * 4); } static int __init nf_conntrack_sip_init(void) { - int i, j, ret; + int i, ret; if (ports_c == 0) ports[ports_c++] = SIP_PORT; @@ -1637,43 +1629,32 @@ static int __init nf_conntrack_sip_init(void) for (i = 0; i < ports_c; i++) { memset(&sip[i], 0, sizeof(sip[i])); - sip[i][0].tuple.src.l3num = AF_INET; - sip[i][0].tuple.dst.protonum = IPPROTO_UDP; - sip[i][0].help = sip_help_udp; - sip[i][1].tuple.src.l3num = AF_INET; - sip[i][1].tuple.dst.protonum = IPPROTO_TCP; - sip[i][1].help = sip_help_tcp; - - sip[i][2].tuple.src.l3num = AF_INET6; - sip[i][2].tuple.dst.protonum = IPPROTO_UDP; - sip[i][2].help = sip_help_udp; - sip[i][3].tuple.src.l3num = AF_INET6; - sip[i][3].tuple.dst.protonum = IPPROTO_TCP; - sip[i][3].help = sip_help_tcp; - - for (j = 0; j < ARRAY_SIZE(sip[i]); j++) { - sip[i][j].data_len = sizeof(struct nf_ct_sip_master); - sip[i][j].tuple.src.u.udp.port = htons(ports[i]); - sip[i][j].expect_policy = sip_exp_policy; - sip[i][j].expect_class_max = SIP_EXPECT_MAX; - sip[i][j].me = THIS_MODULE; - - if (ports[i] == SIP_PORT) - sprintf(sip[i][j].name, "sip"); - else - sprintf(sip[i][j].name, "sip-%u", i); - - pr_debug("port #%u: %u\n", i, ports[i]); + nf_ct_helper_init(&sip[4 * i], AF_INET, IPPROTO_UDP, "sip", + SIP_PORT, ports[i], i, sip_exp_policy, + SIP_EXPECT_MAX, + sizeof(struct nf_ct_sip_master), sip_help_udp, + NULL, THIS_MODULE); + nf_ct_helper_init(&sip[4 * i + 1], AF_INET, IPPROTO_TCP, "sip", + SIP_PORT, ports[i], i, sip_exp_policy, + SIP_EXPECT_MAX, + sizeof(struct nf_ct_sip_master), sip_help_tcp, + NULL, THIS_MODULE); + nf_ct_helper_init(&sip[4 * i + 2], AF_INET6, IPPROTO_UDP, "sip", + SIP_PORT, ports[i], i, sip_exp_policy, + SIP_EXPECT_MAX, + sizeof(struct nf_ct_sip_master), sip_help_udp, + NULL, THIS_MODULE); + nf_ct_helper_init(&sip[4 * i + 3], AF_INET6, IPPROTO_TCP, "sip", + SIP_PORT, ports[i], i, sip_exp_policy, + SIP_EXPECT_MAX, + sizeof(struct nf_ct_sip_master), sip_help_tcp, + NULL, THIS_MODULE); + } - ret = nf_conntrack_helper_register(&sip[i][j]); - if (ret) { - pr_err("failed to register helper for pf: %u port: %u\n", - sip[i][j].tuple.src.l3num, ports[i]); - ports_c = i; - nf_conntrack_sip_fini(); - return ret; - } - } + ret = nf_conntrack_helpers_register(sip, ports_c * 4); + if (ret < 0) { + pr_err("failed to register helpers\n"); + return ret; } return 0; } diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c index 2e65b5430fba..b1227dc6f75e 100644 --- a/net/netfilter/nf_conntrack_tftp.c +++ b/net/netfilter/nf_conntrack_tftp.c @@ -97,7 +97,7 @@ static int tftp_help(struct sk_buff *skb, return ret; } -static struct nf_conntrack_helper tftp[MAX_PORTS][2] __read_mostly; +static struct nf_conntrack_helper tftp[MAX_PORTS * 2] __read_mostly; static const struct nf_conntrack_expect_policy tftp_exp_policy = { .max_expected = 1, @@ -106,47 +106,29 @@ static const struct nf_conntrack_expect_policy tftp_exp_policy = { static void nf_conntrack_tftp_fini(void) { - int i, j; - - for (i = 0; i < ports_c; i++) { - for (j = 0; j < 2; j++) - nf_conntrack_helper_unregister(&tftp[i][j]); - } + nf_conntrack_helpers_unregister(tftp, ports_c * 2); } static int __init nf_conntrack_tftp_init(void) { - int i, j, ret; + int i, ret; if (ports_c == 0) ports[ports_c++] = TFTP_PORT; for (i = 0; i < ports_c; i++) { - memset(&tftp[i], 0, sizeof(tftp[i])); - - tftp[i][0].tuple.src.l3num = AF_INET; - tftp[i][1].tuple.src.l3num = AF_INET6; - for (j = 0; j < 2; j++) { - tftp[i][j].tuple.dst.protonum = IPPROTO_UDP; - tftp[i][j].tuple.src.u.udp.port = htons(ports[i]); - tftp[i][j].expect_policy = &tftp_exp_policy; - tftp[i][j].me = THIS_MODULE; - tftp[i][j].help = tftp_help; - - if (ports[i] == TFTP_PORT) - sprintf(tftp[i][j].name, "tftp"); - else - sprintf(tftp[i][j].name, "tftp-%u", i); - - ret = nf_conntrack_helper_register(&tftp[i][j]); - if (ret) { - pr_err("failed to register helper for pf: %u port: %u\n", - tftp[i][j].tuple.src.l3num, ports[i]); - ports_c = i; - nf_conntrack_tftp_fini(); - return ret; - } - } + nf_ct_helper_init(&tftp[2 * i], AF_INET, IPPROTO_UDP, "tftp", + TFTP_PORT, ports[i], i, &tftp_exp_policy, + 0, 0, tftp_help, NULL, THIS_MODULE); + nf_ct_helper_init(&tftp[2 * i + 1], AF_INET6, IPPROTO_UDP, "tftp", + TFTP_PORT, ports[i], i, &tftp_exp_policy, + 0, 0, tftp_help, NULL, THIS_MODULE); + } + + ret = nf_conntrack_helpers_register(tftp, ports_c * 2); + if (ret < 0) { + pr_err("failed to register helpers\n"); + return ret; } return 0; } -- cgit v1.2.3 From c2d9a4293ced88d7dad7c35c893a31f49f8b64f5 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Mon, 18 Jul 2016 20:44:15 +0800 Subject: netfilter: nft_log: fix possible memory leak if log expr init fail Suppose that we specify the NFTA_LOG_PREFIX, then NFTA_LOG_LEVEL and NFTA_LOG_GROUP are specified together or nf_logger_find_get call returns fail, i.e. expr init fail, memory leak will happen. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_log.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c index 713d66837705..e1b34ff0ebd0 100644 --- a/net/netfilter/nft_log.c +++ b/net/netfilter/nft_log.c @@ -52,6 +52,14 @@ static int nft_log_init(const struct nft_ctx *ctx, struct nft_log *priv = nft_expr_priv(expr); struct nf_loginfo *li = &priv->loginfo; const struct nlattr *nla; + int err; + + li->type = NF_LOG_TYPE_LOG; + if (tb[NFTA_LOG_LEVEL] != NULL && + tb[NFTA_LOG_GROUP] != NULL) + return -EINVAL; + if (tb[NFTA_LOG_GROUP] != NULL) + li->type = NF_LOG_TYPE_ULOG; nla = tb[NFTA_LOG_PREFIX]; if (nla != NULL) { @@ -63,13 +71,6 @@ static int nft_log_init(const struct nft_ctx *ctx, priv->prefix = (char *)nft_log_null_prefix; } - li->type = NF_LOG_TYPE_LOG; - if (tb[NFTA_LOG_LEVEL] != NULL && - tb[NFTA_LOG_GROUP] != NULL) - return -EINVAL; - if (tb[NFTA_LOG_GROUP] != NULL) - li->type = NF_LOG_TYPE_ULOG; - switch (li->type) { case NF_LOG_TYPE_LOG: if (tb[NFTA_LOG_LEVEL] != NULL) { @@ -96,7 +97,16 @@ static int nft_log_init(const struct nft_ctx *ctx, break; } - return nf_logger_find_get(ctx->afi->family, li->type); + err = nf_logger_find_get(ctx->afi->family, li->type); + if (err < 0) + goto err1; + + return 0; + +err1: + if (priv->prefix != nft_log_null_prefix) + kfree(priv->prefix); + return err; } static void nft_log_destroy(const struct nft_ctx *ctx, -- cgit v1.2.3 From 1bc4e0136cb32282d7968e11cfabc40763fdb03c Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Mon, 18 Jul 2016 20:44:16 +0800 Subject: netfilter: nft_log: check the validity of log level User can specify the log level larger than 7(debug level) via nfnetlink, this is invalid. So in this case, we should report EINVAL to the userspace. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_log.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net/netfilter') diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c index e1b34ff0ebd0..5f6f088ff06e 100644 --- a/net/netfilter/nft_log.c +++ b/net/netfilter/nft_log.c @@ -79,6 +79,11 @@ static int nft_log_init(const struct nft_ctx *ctx, } else { li->u.log.level = LOGLEVEL_WARNING; } + if (li->u.log.level > LOGLEVEL_DEBUG) { + err = -EINVAL; + goto err1; + } + if (tb[NFTA_LOG_FLAGS] != NULL) { li->u.log.logflags = ntohl(nla_get_be32(tb[NFTA_LOG_FLAGS])); -- cgit v1.2.3 From cc37c1ad42ba6bc07c3d7a999f898e11d69a2580 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Mon, 18 Jul 2016 20:44:17 +0800 Subject: netfilter: nft_log: fix snaplen does not truncate packets There's a similar problem in xt_NFLOG, and was fixed by commit 7643507fe8b5 ("netfilter: xt_NFLOG: nflog-range does not truncate packets"). Only set copy_len here does not work, so we should enable NF_LOG_F_COPY_LEN also. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_log.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c index 5f6f088ff06e..24a73bb26e94 100644 --- a/net/netfilter/nft_log.c +++ b/net/netfilter/nft_log.c @@ -92,6 +92,7 @@ static int nft_log_init(const struct nft_ctx *ctx, case NF_LOG_TYPE_ULOG: li->u.ulog.group = ntohs(nla_get_be16(tb[NFTA_LOG_GROUP])); if (tb[NFTA_LOG_SNAPLEN] != NULL) { + li->u.ulog.flags |= NF_LOG_F_COPY_LEN; li->u.ulog.copy_len = ntohl(nla_get_be32(tb[NFTA_LOG_SNAPLEN])); } @@ -149,7 +150,7 @@ static int nft_log_dump(struct sk_buff *skb, const struct nft_expr *expr) if (nla_put_be16(skb, NFTA_LOG_GROUP, htons(li->u.ulog.group))) goto nla_put_failure; - if (li->u.ulog.copy_len) { + if (li->u.ulog.flags & NF_LOG_F_COPY_LEN) { if (nla_put_be32(skb, NFTA_LOG_SNAPLEN, htonl(li->u.ulog.copy_len))) goto nla_put_failure; -- cgit v1.2.3 From 6e1f760e13c75eb0c21c75c6eed918e25b54cd07 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 19 Jul 2016 12:20:45 +0200 Subject: netfilter: nf_tables: allow to filter out rules by table and chain If the table and/or chain attributes are set in a rule dump request, we filter out the rules based on this selection. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 0211eaec9060..13d50e7cfe2f 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1857,10 +1857,16 @@ err: return err; } +struct nft_rule_dump_ctx { + char table[NFT_TABLE_MAXNAMELEN]; + char chain[NFT_CHAIN_MAXNAMELEN]; +}; + static int nf_tables_dump_rules(struct sk_buff *skb, struct netlink_callback *cb) { const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); + const struct nft_rule_dump_ctx *ctx = cb->data; const struct nft_af_info *afi; const struct nft_table *table; const struct nft_chain *chain; @@ -1877,7 +1883,15 @@ static int nf_tables_dump_rules(struct sk_buff *skb, continue; list_for_each_entry_rcu(table, &afi->tables, list) { + if (ctx && ctx->table[0] && + strcmp(ctx->table, table->name) != 0) + continue; + list_for_each_entry_rcu(chain, &table->chains, list) { + if (ctx && ctx->chain[0] && + strcmp(ctx->chain, chain->name) != 0) + continue; + list_for_each_entry_rcu(rule, &chain->rules, list) { if (!nft_is_active(net, rule)) goto cont; @@ -1907,6 +1921,12 @@ done: return skb->len; } +static int nf_tables_dump_rules_done(struct netlink_callback *cb) +{ + kfree(cb->data); + return 0; +} + static int nf_tables_getrule(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -1924,7 +1944,25 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk, if (nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .dump = nf_tables_dump_rules, + .done = nf_tables_dump_rules_done, }; + + if (nla[NFTA_RULE_TABLE] || nla[NFTA_RULE_CHAIN]) { + struct nft_rule_dump_ctx *ctx; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + if (nla[NFTA_RULE_TABLE]) + nla_strlcpy(ctx->table, nla[NFTA_RULE_TABLE], + sizeof(ctx->table)); + if (nla[NFTA_RULE_CHAIN]) + nla_strlcpy(ctx->chain, nla[NFTA_RULE_CHAIN], + sizeof(ctx->chain)); + c.data = ctx; + } + return netlink_dump_start(nlsk, skb, nlh, &c); } -- cgit v1.2.3 From 23014011ba4209a086931ff402eac1c41abbe456 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 21 Jul 2016 12:51:16 +0200 Subject: netfilter: conntrack: support a fixed size of 128 distinct labels The conntrack label extension is currently variable-sized, e.g. if only 2 labels are used by iptables rules then the labels->bits[] array will only contain one element. We track size of each label storage area in the 'words' member. But in nftables and openvswitch we always have to ask for worst-case since we don't know what bit will be used at configuration time. As most arches are 64bit we need to allocate 24 bytes in this case: struct nf_conn_labels { u8 words; /* 0 1 */ /* XXX 7 bytes hole, try to pack */ long unsigned bits[2]; /* 8 24 */ Make bits a fixed size and drop the words member, it simplifies the code and only increases memory requirements on x86 when less than 64bit labels are required. We still only allocate the extension if its needed. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_labels.h | 16 ++++------------ net/netfilter/nf_conntrack_labels.c | 13 +++---------- net/netfilter/nf_conntrack_netlink.c | 10 +++++----- net/netfilter/nft_ct.c | 13 +++---------- net/netfilter/xt_connlabel.c | 2 +- net/openvswitch/conntrack.c | 4 ++-- 6 files changed, 18 insertions(+), 40 deletions(-) (limited to 'net/netfilter') diff --git a/include/net/netfilter/nf_conntrack_labels.h b/include/net/netfilter/nf_conntrack_labels.h index c5f8fc736b3d..0fd4989de836 100644 --- a/include/net/netfilter/nf_conntrack_labels.h +++ b/include/net/netfilter/nf_conntrack_labels.h @@ -10,8 +10,7 @@ #define NF_CT_LABELS_MAX_SIZE ((XT_CONNLABEL_MAXBIT + 1) / BITS_PER_BYTE) struct nf_conn_labels { - u8 words; - unsigned long bits[]; + unsigned long bits[NF_CT_LABELS_MAX_SIZE / sizeof(long)]; }; static inline struct nf_conn_labels *nf_ct_labels_find(const struct nf_conn *ct) @@ -26,20 +25,13 @@ static inline struct nf_conn_labels *nf_ct_labels_find(const struct nf_conn *ct) static inline struct nf_conn_labels *nf_ct_labels_ext_add(struct nf_conn *ct) { #ifdef CONFIG_NF_CONNTRACK_LABELS - struct nf_conn_labels *cl_ext; struct net *net = nf_ct_net(ct); - u8 words; - words = ACCESS_ONCE(net->ct.label_words); - if (words == 0) + if (net->ct.labels_used == 0) return NULL; - cl_ext = nf_ct_ext_add_length(ct, NF_CT_EXT_LABELS, - words * sizeof(long), GFP_ATOMIC); - if (cl_ext != NULL) - cl_ext->words = words; - - return cl_ext; + return nf_ct_ext_add_length(ct, NF_CT_EXT_LABELS, + sizeof(struct nf_conn_labels), GFP_ATOMIC); #else return NULL; #endif diff --git a/net/netfilter/nf_conntrack_labels.c b/net/netfilter/nf_conntrack_labels.c index 252e6a7cd2f1..7686200f9ace 100644 --- a/net/netfilter/nf_conntrack_labels.c +++ b/net/netfilter/nf_conntrack_labels.c @@ -20,7 +20,7 @@ int nf_connlabel_set(struct nf_conn *ct, u16 bit) { struct nf_conn_labels *labels = nf_ct_labels_find(ct); - if (!labels || BIT_WORD(bit) >= labels->words) + if (!labels) return -ENOSPC; if (test_bit(bit, labels->bits)) @@ -60,7 +60,7 @@ int nf_connlabels_replace(struct nf_conn *ct, if (!labels) return -ENOSPC; - size = labels->words * sizeof(long); + size = sizeof(labels->bits); if (size < (words32 * sizeof(u32))) words32 = size / sizeof(u32); @@ -80,16 +80,11 @@ EXPORT_SYMBOL_GPL(nf_connlabels_replace); int nf_connlabels_get(struct net *net, unsigned int bits) { - size_t words; - - words = BIT_WORD(bits) + 1; - if (words > NF_CT_LABELS_MAX_SIZE / sizeof(long)) + if (BIT_WORD(bits) >= NF_CT_LABELS_MAX_SIZE / sizeof(long)) return -ERANGE; spin_lock(&nf_connlabels_lock); net->ct.labels_used++; - if (words > net->ct.label_words) - net->ct.label_words = words; spin_unlock(&nf_connlabels_lock); return 0; @@ -100,8 +95,6 @@ void nf_connlabels_put(struct net *net) { spin_lock(&nf_connlabels_lock); net->ct.labels_used--; - if (net->ct.labels_used == 0) - net->ct.label_words = 0; spin_unlock(&nf_connlabels_lock); } EXPORT_SYMBOL_GPL(nf_connlabels_put); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index a18d1ceabad5..050bb3420a6b 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -346,25 +346,25 @@ static inline int ctnetlink_label_size(const struct nf_conn *ct) if (!labels) return 0; - return nla_total_size(labels->words * sizeof(long)); + return nla_total_size(sizeof(labels->bits)); } static int ctnetlink_dump_labels(struct sk_buff *skb, const struct nf_conn *ct) { struct nf_conn_labels *labels = nf_ct_labels_find(ct); - unsigned int len, i; + unsigned int i; if (!labels) return 0; - len = labels->words * sizeof(long); i = 0; do { if (labels->bits[i] != 0) - return nla_put(skb, CTA_LABELS, len, labels->bits); + return nla_put(skb, CTA_LABELS, sizeof(labels->bits), + labels->bits); i++; - } while (i < labels->words); + } while (i < ARRAY_SIZE(labels->bits)); return 0; } diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index d9e44ca34055..2f47d5d3ae3b 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -113,18 +113,11 @@ static void nft_ct_get_eval(const struct nft_expr *expr, #ifdef CONFIG_NF_CONNTRACK_LABELS case NFT_CT_LABELS: { struct nf_conn_labels *labels = nf_ct_labels_find(ct); - unsigned int size; - if (!labels) { + if (labels) + memcpy(dest, labels->bits, NF_CT_LABELS_MAX_SIZE); + else memset(dest, 0, NF_CT_LABELS_MAX_SIZE); - return; - } - - size = labels->words * sizeof(long); - memcpy(dest, labels->bits, size); - if (size < NF_CT_LABELS_MAX_SIZE) - memset(((char *) dest) + size, 0, - NF_CT_LABELS_MAX_SIZE - size); return; } #endif diff --git a/net/netfilter/xt_connlabel.c b/net/netfilter/xt_connlabel.c index a79af255561a..c9fba8ade3d5 100644 --- a/net/netfilter/xt_connlabel.c +++ b/net/netfilter/xt_connlabel.c @@ -25,7 +25,7 @@ static bool connlabel_match(const struct nf_conn *ct, u16 bit) if (!labels) return false; - return BIT_WORD(bit) < labels->words && test_bit(bit, labels->bits); + return test_bit(bit, labels->bits); } static bool diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index b4069a90e375..c644c78ed485 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -135,7 +135,7 @@ static void ovs_ct_get_labels(const struct nf_conn *ct, struct nf_conn_labels *cl = ct ? nf_ct_labels_find(ct) : NULL; if (cl) { - size_t len = cl->words * sizeof(long); + size_t len = sizeof(cl->bits); if (len > OVS_CT_LABELS_LEN) len = OVS_CT_LABELS_LEN; @@ -274,7 +274,7 @@ static int ovs_ct_set_labels(struct sk_buff *skb, struct sw_flow_key *key, nf_ct_labels_ext_add(ct); cl = nf_ct_labels_find(ct); } - if (!cl || cl->words * sizeof(long) < OVS_CT_LABELS_LEN) + if (!cl || sizeof(cl->bits) < OVS_CT_LABELS_LEN) return -ENOSPC; err = nf_connlabels_replace(ct, (u32 *)labels, (u32 *)mask, -- cgit v1.2.3 From 857ed310c013fe0d0059f955048dab589fa7a57a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 21 Jul 2016 12:51:17 +0200 Subject: netfilter: connlabels: move set helper to xt_connlabel xt_connlabel is the only user so move it. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_labels.h | 2 -- net/netfilter/nf_conntrack_labels.c | 17 ----------------- net/netfilter/xt_connlabel.c | 29 ++++++++++++++++------------- 3 files changed, 16 insertions(+), 32 deletions(-) (limited to 'net/netfilter') diff --git a/include/net/netfilter/nf_conntrack_labels.h b/include/net/netfilter/nf_conntrack_labels.h index 0fd4989de836..498814626e28 100644 --- a/include/net/netfilter/nf_conntrack_labels.h +++ b/include/net/netfilter/nf_conntrack_labels.h @@ -37,8 +37,6 @@ static inline struct nf_conn_labels *nf_ct_labels_ext_add(struct nf_conn *ct) #endif } -int nf_connlabel_set(struct nf_conn *ct, u16 bit); - int nf_connlabels_replace(struct nf_conn *ct, const u32 *data, const u32 *mask, unsigned int words); diff --git a/net/netfilter/nf_conntrack_labels.c b/net/netfilter/nf_conntrack_labels.c index 7686200f9ace..bcab8bde7312 100644 --- a/net/netfilter/nf_conntrack_labels.c +++ b/net/netfilter/nf_conntrack_labels.c @@ -16,23 +16,6 @@ static spinlock_t nf_connlabels_lock; -int nf_connlabel_set(struct nf_conn *ct, u16 bit) -{ - struct nf_conn_labels *labels = nf_ct_labels_find(ct); - - if (!labels) - return -ENOSPC; - - if (test_bit(bit, labels->bits)) - return 0; - - if (!test_and_set_bit(bit, labels->bits)) - nf_conntrack_event_cache(IPCT_LABEL, ct); - - return 0; -} -EXPORT_SYMBOL_GPL(nf_connlabel_set); - static int replace_u32(u32 *address, u32 mask, u32 new) { u32 old, tmp; diff --git a/net/netfilter/xt_connlabel.c b/net/netfilter/xt_connlabel.c index c9fba8ade3d5..03d66f1c5e69 100644 --- a/net/netfilter/xt_connlabel.c +++ b/net/netfilter/xt_connlabel.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -18,21 +19,12 @@ MODULE_DESCRIPTION("Xtables: add/match connection trackling labels"); MODULE_ALIAS("ipt_connlabel"); MODULE_ALIAS("ip6t_connlabel"); -static bool connlabel_match(const struct nf_conn *ct, u16 bit) -{ - struct nf_conn_labels *labels = nf_ct_labels_find(ct); - - if (!labels) - return false; - - return test_bit(bit, labels->bits); -} - static bool connlabel_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_connlabel_mtinfo *info = par->matchinfo; enum ip_conntrack_info ctinfo; + struct nf_conn_labels *labels; struct nf_conn *ct; bool invert = info->options & XT_CONNLABEL_OP_INVERT; @@ -40,10 +32,21 @@ connlabel_mt(const struct sk_buff *skb, struct xt_action_param *par) if (ct == NULL || nf_ct_is_untracked(ct)) return invert; - if (info->options & XT_CONNLABEL_OP_SET) - return (nf_connlabel_set(ct, info->bit) == 0) ^ invert; + labels = nf_ct_labels_find(ct); + if (!labels) + return invert; + + if (test_bit(info->bit, labels->bits)) + return !invert; + + if (info->options & XT_CONNLABEL_OP_SET) { + if (!test_and_set_bit(info->bit, labels->bits)) + nf_conntrack_event_cache(IPCT_LABEL, ct); + + return !invert; + } - return connlabel_match(ct, info->bit) ^ invert; + return invert; } static int connlabel_mt_check(const struct xt_mtchk_param *par) -- cgit v1.2.3 From 96d1327ac2e3dc3ac4204fe3656dad0043fc0efd Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Fri, 22 Jul 2016 12:59:15 +0800 Subject: netfilter: h323: Use mod_timer instead of set_expect_timeout Simplify the code without any side effect. The set_expect_timeout is used to modify the timer expired time. It tries to delete timer, and add it again. So we could use mod_timer directly. Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_h323_main.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 9511af04dc81..bb77a97961bf 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -1272,19 +1272,6 @@ static struct nf_conntrack_expect *find_expect(struct nf_conn *ct, return NULL; } -/****************************************************************************/ -static int set_expect_timeout(struct nf_conntrack_expect *exp, - unsigned int timeout) -{ - if (!exp || !del_timer(&exp->timeout)) - return 0; - - exp->timeout.expires = jiffies + timeout * HZ; - add_timer(&exp->timeout); - - return 1; -} - /****************************************************************************/ static int expect_q931(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, @@ -1486,7 +1473,7 @@ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct, "timeout to %u seconds for", info->timeout); nf_ct_dump_tuple(&exp->tuple); - set_expect_timeout(exp, info->timeout); + mod_timer(&exp->timeout, jiffies + info->timeout * HZ); } spin_unlock_bh(&nf_conntrack_expect_lock); } -- cgit v1.2.3 From 2bf4fade54de995f84d319ae02003609dca450f3 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 23 Jul 2016 16:00:31 +0800 Subject: netfilter: nft_compat: put back match/target module if init fail If the user specify the invalid NFTA_MATCH_INFO/NFTA_TARGET_INFO attr or memory alloc fail, we should call module_put to the related match or target. Otherwise, we cannot remove the module even nobody use it. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_compat.c | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 6228c422c766..2e07cec50ffd 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -634,6 +634,7 @@ nft_match_select_ops(const struct nft_ctx *ctx, struct xt_match *match; char *mt_name; u32 rev, family; + int err; if (tb[NFTA_MATCH_NAME] == NULL || tb[NFTA_MATCH_REV] == NULL || @@ -660,13 +661,17 @@ nft_match_select_ops(const struct nft_ctx *ctx, if (IS_ERR(match)) return ERR_PTR(-ENOENT); - if (match->matchsize > nla_len(tb[NFTA_MATCH_INFO])) - return ERR_PTR(-EINVAL); + if (match->matchsize > nla_len(tb[NFTA_MATCH_INFO])) { + err = -EINVAL; + goto err; + } /* This is the first time we use this match, allocate operations */ nft_match = kzalloc(sizeof(struct nft_xt), GFP_KERNEL); - if (nft_match == NULL) - return ERR_PTR(-ENOMEM); + if (nft_match == NULL) { + err = -ENOMEM; + goto err; + } nft_match->ops.type = &nft_match_type; nft_match->ops.size = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize)); @@ -680,6 +685,9 @@ nft_match_select_ops(const struct nft_ctx *ctx, list_add(&nft_match->head, &nft_match_list); return &nft_match->ops; +err: + module_put(match->me); + return ERR_PTR(err); } static void nft_match_release(void) @@ -717,6 +725,7 @@ nft_target_select_ops(const struct nft_ctx *ctx, struct xt_target *target; char *tg_name; u32 rev, family; + int err; if (tb[NFTA_TARGET_NAME] == NULL || tb[NFTA_TARGET_REV] == NULL || @@ -743,13 +752,17 @@ nft_target_select_ops(const struct nft_ctx *ctx, if (IS_ERR(target)) return ERR_PTR(-ENOENT); - if (target->targetsize > nla_len(tb[NFTA_TARGET_INFO])) - return ERR_PTR(-EINVAL); + if (target->targetsize > nla_len(tb[NFTA_TARGET_INFO])) { + err = -EINVAL; + goto err; + } /* This is the first time we use this target, allocate operations */ nft_target = kzalloc(sizeof(struct nft_xt), GFP_KERNEL); - if (nft_target == NULL) - return ERR_PTR(-ENOMEM); + if (nft_target == NULL) { + err = -ENOMEM; + goto err; + } nft_target->ops.type = &nft_target_type; nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize)); @@ -767,6 +780,9 @@ nft_target_select_ops(const struct nft_ctx *ctx, list_add(&nft_target->head, &nft_target_list); return &nft_target->ops; +err: + module_put(target->me); + return ERR_PTR(err); } static void nft_target_release(void) -- cgit v1.2.3 From 4b512e1c1f8de6b9ceb796ecef8658e0a083cab7 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 23 Jul 2016 16:00:32 +0800 Subject: netfilter: nft_compat: fix crash when related match/target module is removed We "cache" the loaded match/target modules and reuse them, but when the modules are removed, we still point to them. Then we may end up with invalid memory references when using iptables-compat to add rules later. Input the following commands will reproduce the kernel crash: # iptables-compat -A INPUT -j LOG # iptables-compat -D INPUT -j LOG # rmmod xt_LOG # iptables-compat -A INPUT -j LOG BUG: unable to handle kernel paging request at ffffffffa05a9010 IP: [] strcmp+0xe/0x30 Call Trace: [] nft_target_select_ops+0x83/0x1f0 [nft_compat] [] nf_tables_expr_parse+0x147/0x1f0 [nf_tables] [] nf_tables_newrule+0x301/0x810 [nf_tables] [] ? nla_parse+0x20/0x100 [] nfnetlink_rcv+0x33f/0x53d [nfnetlink] [] ? nfnetlink_rcv+0x1fb/0x53d [nfnetlink] [] netlink_unicast+0x178/0x220 [] netlink_sendmsg+0x2fb/0x3a0 [] sock_sendmsg+0x38/0x50 [] ___sys_sendmsg+0x28e/0x2a0 [] ? release_sock+0x1e/0xb0 [] ? _raw_spin_unlock_bh+0x35/0x40 [] ? release_sock+0x82/0xb0 [] __sys_sendmsg+0x54/0x90 [] SyS_sendmsg+0x12/0x20 [] entry_SYSCALL_64_fastpath+0x1a/0xa9 So when nobody use the related match/target module, there's no need to "cache" it. And nft_[match|target]_release are useless anymore, remove them. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_compat.c | 43 ++++++++++++++++++++----------------------- 1 file changed, 20 insertions(+), 23 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 2e07cec50ffd..c21e7eb8dce0 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -23,6 +23,20 @@ #include #include +struct nft_xt { + struct list_head head; + struct nft_expr_ops ops; + unsigned int refcnt; +}; + +static void nft_xt_put(struct nft_xt *xt) +{ + if (--xt->refcnt == 0) { + list_del(&xt->head); + kfree(xt); + } +} + static int nft_compat_chain_validate_dependency(const char *tablename, const struct nft_chain *chain) { @@ -260,6 +274,7 @@ nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) if (par.target->destroy != NULL) par.target->destroy(&par); + nft_xt_put(container_of(expr->ops, struct nft_xt, ops)); module_put(target->me); } @@ -442,6 +457,7 @@ nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) if (par.match->destroy != NULL) par.match->destroy(&par); + nft_xt_put(container_of(expr->ops, struct nft_xt, ops)); module_put(match->me); } @@ -612,11 +628,6 @@ static const struct nfnetlink_subsystem nfnl_compat_subsys = { static LIST_HEAD(nft_match_list); -struct nft_xt { - struct list_head head; - struct nft_expr_ops ops; -}; - static struct nft_expr_type nft_match_type; static bool nft_match_cmp(const struct xt_match *match, @@ -653,6 +664,7 @@ nft_match_select_ops(const struct nft_ctx *ctx, if (!try_module_get(match->me)) return ERR_PTR(-ENOENT); + nft_match->refcnt++; return &nft_match->ops; } } @@ -673,6 +685,7 @@ nft_match_select_ops(const struct nft_ctx *ctx, goto err; } + nft_match->refcnt = 1; nft_match->ops.type = &nft_match_type; nft_match->ops.size = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize)); nft_match->ops.eval = nft_match_eval; @@ -690,14 +703,6 @@ err: return ERR_PTR(err); } -static void nft_match_release(void) -{ - struct nft_xt *nft_match, *tmp; - - list_for_each_entry_safe(nft_match, tmp, &nft_match_list, head) - kfree(nft_match); -} - static struct nft_expr_type nft_match_type __read_mostly = { .name = "match", .select_ops = nft_match_select_ops, @@ -744,6 +749,7 @@ nft_target_select_ops(const struct nft_ctx *ctx, if (!try_module_get(target->me)) return ERR_PTR(-ENOENT); + nft_target->refcnt++; return &nft_target->ops; } } @@ -764,6 +770,7 @@ nft_target_select_ops(const struct nft_ctx *ctx, goto err; } + nft_target->refcnt = 1; nft_target->ops.type = &nft_target_type; nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize)); nft_target->ops.init = nft_target_init; @@ -785,14 +792,6 @@ err: return ERR_PTR(err); } -static void nft_target_release(void) -{ - struct nft_xt *nft_target, *tmp; - - list_for_each_entry_safe(nft_target, tmp, &nft_target_list, head) - kfree(nft_target); -} - static struct nft_expr_type nft_target_type __read_mostly = { .name = "target", .select_ops = nft_target_select_ops, @@ -835,8 +834,6 @@ static void __exit nft_compat_module_exit(void) nfnetlink_subsys_unregister(&nfnl_compat_subsys); nft_unregister_expr(&nft_target_type); nft_unregister_expr(&nft_match_type); - nft_match_release(); - nft_target_release(); } MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_NFT_COMPAT); -- cgit v1.2.3